Skip to content

Commit 732c916

Browse files
committed
!fixup update after rebase, check values to ignore.
1 parent 3181ed2 commit 732c916

File tree

6 files changed

+82
-90
lines changed

6 files changed

+82
-90
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4876,7 +4876,8 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
48764876
/// Returns the register usage for each VF in \p VFs.
48774877
static SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
48784878
calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
4879-
const TargetTransformInfo &TTI) {
4879+
const TargetTransformInfo &TTI,
4880+
const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
48804881
// This function calculates the register usage by measuring the highest number
48814882
// of values that are alive at a single location. Obviously, this is a very
48824883
// rough estimation. We scan the loop in a topological order in order and
@@ -4989,10 +4990,19 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
49894990
for (VPRecipeBase *ToRemove : List)
49904991
OpenIntervals.erase(ToRemove);
49914992

4992-
// Ignore recipes that are never used within the loop.
4993+
// Ignore recipes that are never used within the loop and do not have side
4994+
// effects.
49934995
if (!Ends.count(R) && !R->mayHaveSideEffects())
49944996
continue;
49954997

4998+
// Skip recipes for ignored values.
4999+
// TODO: Should mark recipes for ephemeral values that cannot be removed
5000+
// explictly in VPlan.
5001+
if (isa<VPSingleDefRecipe>(R) &&
5002+
ValuesToIgnore.contains(
5003+
cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
5004+
continue;
5005+
49965006
// For each VF find the maximum usage of registers.
49975007
for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
49985008
// Count the number of registers used, per register class, given all open
@@ -5147,7 +5157,8 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
51475157
return 1;
51485158
}
51495159

5150-
RegisterUsage R = ::calculateRegisterUsage(Plan, {VF}, TTI)[0];
5160+
RegisterUsage R =
5161+
::calculateRegisterUsage(Plan, {VF}, TTI, ValuesToIgnore)[0];
51515162
// We divide by these constants so assume that we have at least one
51525163
// instruction that uses at least one register.
51535164
for (auto &Pair : R.MaxLocalUsers) {

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 21 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,10 +3240,10 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
32403240
; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX58_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX58]], align 4
32413241
; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX67_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX67]], align 4
32423242
; CHECK-INTERLEAVED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
3243-
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8
3243+
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
32443244
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
32453245
; CHECK-INTERLEAVED: vector.ph:
3246-
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8
3246+
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
32473247
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
32483248
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[ARRAYIDX67_PROMOTED]], i32 0
32493249
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[ARRAYIDX58_PROMOTED]], i32 0
@@ -3257,42 +3257,19 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
32573257
; CHECK-INTERLEAVED: vector.body:
32583258
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
32593259
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[VECTOR_BODY]] ]
3260-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP65:%.*]], [[VECTOR_BODY]] ]
32613260
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ [[TMP1]], [[VECTOR_PH]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ]
3262-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP59:%.*]], [[VECTOR_BODY]] ]
32633261
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ]
3264-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
32653262
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[VECTOR_BODY]] ]
3266-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ]
32673263
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI8:%.*]] = phi <4 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP40:%.*]], [[VECTOR_BODY]] ]
3268-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
32693264
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
3270-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
32713265
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
3272-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI13:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ]
32733266
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI14:%.*]] = phi <4 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
3274-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI15:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
3275-
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4
32763267
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
32773268
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i32 0
3278-
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i32 4
3279-
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
3280-
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
3281-
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
3269+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
32823270
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[WIDE_LOAD16]] to <4 x i32>
3283-
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = shl nsw i64 [[INDEX]], 3
3284-
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nsw i64 [[TMP8]], 3
3285-
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]]
3271+
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nsw i64 [[INDEX]], 3
32863272
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP15]]
3287-
; CHECK-INTERLEAVED-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP16]], align 1
3288-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
3289-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 1, i32 9, i32 17, i32 25>
3290-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 2, i32 10, i32 18, i32 26>
3291-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
3292-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 4, i32 12, i32 20, i32 28>
3293-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 5, i32 13, i32 21, i32 29>
3294-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 6, i32 14, i32 22, i32 30>
3295-
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 7, i32 15, i32 23, i32 31>
32963273
; CHECK-INTERLEAVED-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i8>, ptr [[TMP17]], align 1
32973274
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <32 x i8> [[WIDE_VEC24]], <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
32983275
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <32 x i8> [[WIDE_VEC24]], <32 x i8> poison, <4 x i32> <i32 1, i32 9, i32 17, i32 25>
@@ -3302,74 +3279,42 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
33023279
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <32 x i8> [[WIDE_VEC24]], <32 x i8> poison, <4 x i32> <i32 5, i32 13, i32 21, i32 29>
33033280
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <32 x i8> [[WIDE_VEC24]], <32 x i8> poison, <4 x i32> <i32 6, i32 14, i32 22, i32 30>
33043281
; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <32 x i8> [[WIDE_VEC24]], <32 x i8> poison, <4 x i32> <i32 7, i32 15, i32 23, i32 31>
3305-
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext <4 x i8> [[STRIDED_VEC]] to <4 x i32>
33063282
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = sext <4 x i8> [[STRIDED_VEC25]] to <4 x i32>
3307-
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP18]], [[TMP12]]
33083283
; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP19]], [[TMP13]]
3309-
; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add <4 x i32> [[TMP20]], [[VEC_PHI14]]
3310-
; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <4 x i32> [[TMP21]], [[VEC_PHI15]]
3311-
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = sext <4 x i8> [[STRIDED_VEC17]] to <4 x i32>
3284+
; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add <4 x i32> [[TMP21]], [[VEC_PHI14]]
33123285
; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <4 x i8> [[STRIDED_VEC26]] to <4 x i32>
3313-
; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <4 x i32> [[TMP24]], [[TMP12]]
33143286
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = mul nsw <4 x i32> [[TMP25]], [[TMP13]]
3315-
; CHECK-INTERLEAVED-NEXT: [[TMP28]] = add <4 x i32> [[TMP26]], [[VEC_PHI12]]
3316-
; CHECK-INTERLEAVED-NEXT: [[TMP29]] = add <4 x i32> [[TMP27]], [[VEC_PHI13]]
3317-
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = sext <4 x i8> [[STRIDED_VEC18]] to <4 x i32>
3287+
; CHECK-INTERLEAVED-NEXT: [[TMP28]] = add <4 x i32> [[TMP27]], [[VEC_PHI12]]
33183288
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = sext <4 x i8> [[STRIDED_VEC27]] to <4 x i32>
3319-
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = mul nsw <4 x i32> [[TMP30]], [[TMP12]]
33203289
; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = mul nsw <4 x i32> [[TMP31]], [[TMP13]]
3321-
; CHECK-INTERLEAVED-NEXT: [[TMP34]] = add <4 x i32> [[TMP32]], [[VEC_PHI10]]
3322-
; CHECK-INTERLEAVED-NEXT: [[TMP35]] = add <4 x i32> [[TMP33]], [[VEC_PHI11]]
3323-
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = sext <4 x i8> [[STRIDED_VEC19]] to <4 x i32>
3290+
; CHECK-INTERLEAVED-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[VEC_PHI10]]
33243291
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = sext <4 x i8> [[STRIDED_VEC28]] to <4 x i32>
3325-
; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = mul nsw <4 x i32> [[TMP36]], [[TMP12]]
33263292
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = mul nsw <4 x i32> [[TMP37]], [[TMP13]]
3327-
; CHECK-INTERLEAVED-NEXT: [[TMP40]] = add <4 x i32> [[TMP38]], [[VEC_PHI8]]
3328-
; CHECK-INTERLEAVED-NEXT: [[TMP41]] = add <4 x i32> [[TMP39]], [[VEC_PHI9]]
3329-
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = sext <4 x i8> [[STRIDED_VEC20]] to <4 x i32>
3293+
; CHECK-INTERLEAVED-NEXT: [[TMP40]] = add <4 x i32> [[TMP39]], [[VEC_PHI8]]
33303294
; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = sext <4 x i8> [[STRIDED_VEC29]] to <4 x i32>
3331-
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP12]]
33323295
; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP13]]
3333-
; CHECK-INTERLEAVED-NEXT: [[TMP46]] = add <4 x i32> [[TMP44]], [[VEC_PHI6]]
3334-
; CHECK-INTERLEAVED-NEXT: [[TMP47]] = add <4 x i32> [[TMP45]], [[VEC_PHI7]]
3335-
; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = sext <4 x i8> [[STRIDED_VEC21]] to <4 x i32>
3296+
; CHECK-INTERLEAVED-NEXT: [[TMP46]] = add <4 x i32> [[TMP45]], [[VEC_PHI6]]
33363297
; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = sext <4 x i8> [[STRIDED_VEC30]] to <4 x i32>
3337-
; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = mul nsw <4 x i32> [[TMP48]], [[TMP12]]
33383298
; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = mul nsw <4 x i32> [[TMP49]], [[TMP13]]
3339-
; CHECK-INTERLEAVED-NEXT: [[TMP52]] = add <4 x i32> [[TMP50]], [[VEC_PHI4]]
3340-
; CHECK-INTERLEAVED-NEXT: [[TMP53]] = add <4 x i32> [[TMP51]], [[VEC_PHI5]]
3341-
; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = sext <4 x i8> [[STRIDED_VEC22]] to <4 x i32>
3299+
; CHECK-INTERLEAVED-NEXT: [[TMP52]] = add <4 x i32> [[TMP51]], [[VEC_PHI4]]
33423300
; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = sext <4 x i8> [[STRIDED_VEC31]] to <4 x i32>
3343-
; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = mul nsw <4 x i32> [[TMP54]], [[TMP12]]
33443301
; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = mul nsw <4 x i32> [[TMP55]], [[TMP13]]
3345-
; CHECK-INTERLEAVED-NEXT: [[TMP58]] = add <4 x i32> [[TMP56]], [[VEC_PHI2]]
3346-
; CHECK-INTERLEAVED-NEXT: [[TMP59]] = add <4 x i32> [[TMP57]], [[VEC_PHI3]]
3347-
; CHECK-INTERLEAVED-NEXT: [[TMP60:%.*]] = sext <4 x i8> [[STRIDED_VEC23]] to <4 x i32>
3302+
; CHECK-INTERLEAVED-NEXT: [[TMP58]] = add <4 x i32> [[TMP57]], [[VEC_PHI2]]
33483303
; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = sext <4 x i8> [[STRIDED_VEC32]] to <4 x i32>
3349-
; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = mul nsw <4 x i32> [[TMP60]], [[TMP12]]
33503304
; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = mul nsw <4 x i32> [[TMP61]], [[TMP13]]
3351-
; CHECK-INTERLEAVED-NEXT: [[TMP64]] = add <4 x i32> [[TMP62]], [[VEC_PHI]]
3352-
; CHECK-INTERLEAVED-NEXT: [[TMP65]] = add <4 x i32> [[TMP63]], [[VEC_PHI1]]
3353-
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3305+
; CHECK-INTERLEAVED-NEXT: [[TMP64]] = add <4 x i32> [[TMP63]], [[VEC_PHI]]
3306+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
33543307
; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
33553308
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP66]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
33563309
; CHECK-INTERLEAVED: middle.block:
3357-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP65]], [[TMP64]]
3358-
; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
3359-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX33:%.*]] = add <4 x i32> [[TMP59]], [[TMP58]]
3360-
; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX33]])
3361-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX34:%.*]] = add <4 x i32> [[TMP53]], [[TMP52]]
3362-
; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX34]])
3363-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX35:%.*]] = add <4 x i32> [[TMP47]], [[TMP46]]
3364-
; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX35]])
3365-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX36:%.*]] = add <4 x i32> [[TMP41]], [[TMP40]]
3366-
; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX36]])
3367-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP35]], [[TMP34]]
3368-
; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX37]])
3369-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP29]], [[TMP28]]
3370-
; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]])
3371-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX39:%.*]] = add <4 x i32> [[TMP23]], [[TMP22]]
3372-
; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX39]])
3310+
; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP64]])
3311+
; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP58]])
3312+
; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP52]])
3313+
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
3314+
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
3315+
; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP34]])
3316+
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP28]])
3317+
; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP22]])
33733318
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
33743319
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
33753320
; CHECK-INTERLEAVED: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ define void @load_and_compare_only_used_by_assume(ptr %a, ptr noalias %b) {
4545
; CHECK-LABEL: LV: Checking a loop in 'load_and_compare_only_used_by_assume'
4646
; CHECK: LV(REG): VF = vscale x 4
4747
; CHECK-NEXT: LV(REG): Found max usage: 2 item
48-
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
48+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
4949
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 1 registers
50-
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
50+
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
5151

5252
entry:
5353
br label %loop

0 commit comments

Comments
 (0)