diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 333e50ee98418..cabbabafcf520 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6139,11 +6139,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, // First-order recurrences are replaced by vector shuffles inside the loop. if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi)) { - // For , if vscale = 1 we are unable to extract the - // penultimate value of the recurrence. - // TODO: Consider vscale_range info. - if (VF.isScalable() && VF.getKnownMinValue() == 1) - return InstructionCost::getInvalid(); SmallVector Mask(VF.getKnownMinValue()); std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); return TTI.getShuffleCost(TargetTransformInfo::SK_Splice, @@ -8561,13 +8556,17 @@ addUsersInExitBlocks(VPlan &Plan, /// users in the original exit block using the VPIRInstruction wrapping to the /// LCSSA phi. static void addExitUsersForFirstOrderRecurrences( - VPlan &Plan, SetVector &ExitUsersToFix) { + VPlan &Plan, SetVector &ExitUsersToFix, VFRange &Range) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); auto *ScalarPHVPBB = Plan.getScalarPreheader(); auto *MiddleVPBB = Plan.getMiddleBlock(); VPBuilder ScalarPHBuilder(ScalarPHVPBB); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); + auto IsScalableOne = [](ElementCount VF) -> bool { + return VF == ElementCount::getScalable(1); + }; + for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) { auto *FOR = dyn_cast(&HeaderPhi); if (!FOR) @@ -8649,6 +8648,13 @@ static void addExitUsersForFirstOrderRecurrences( for (VPIRInstruction *ExitIRI : ExitUsersToFix) { if (ExitIRI->getOperand(0) != FOR) continue; + // For VF vscale x 1, if vscale = 1, we are unable to extract the + // penultimate value of the recurrence. Instead, we can extract the last + // element directly from VPInstruction::FirstOrderRecurrenceSplice. + // TODO: Consider vscale_range info and UF. + if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne, + Range)) + return; VPValue *PenultimateElement = MiddleBuilder.createNaryOp( VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()}, {}, "vector.recur.extract.for.phi"); @@ -8864,7 +8870,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); SetVector ExitUsersToFix = collectUsersInLatchExitBlock(*Plan); - addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix, Range); addUsersInExitBlocks(*Plan, ExitUsersToFix); // --------------------------------------------------------------------------- diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 62b99d98a2b5e..b877e5407b27b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3850,9 +3850,6 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, if (VF.isScalar()) return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); - if (VF.isScalable() && VF.getKnownMinValue() == 1) - return InstructionCost::getInvalid(); - return 0; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index 1d12f11b20e18..dd912651dad05 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -1,37 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -S %s | FileCheck %s +; RUN: opt -p loop-vectorize -scalable-vectorization=on -force-vector-width=1 -S %s | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux-gnu" -; Make sure we do not pick as VF for a loop with a -; first-order recurrence. define i64 @pr97452_scalable_vf1_for(ptr %src) #0 { ; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for( ; CHECK-SAME: ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD1]] = load <4 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 -; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP7]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP13]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] @@ -39,7 +52,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 { ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[SCALAR_RECUR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 98a942a501077..b20d59bd5760e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -8,17 +8,51 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for_live_out( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP7]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP13]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[L:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -43,17 +77,51 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-LABEL: define void @pr97452_scalable_vf1_for_no_live_out( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[TMP7]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP12]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[L:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP_DST]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -74,3 +142,11 @@ loop: exit: ret void } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;.