diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c9d603612aece..fa7907c428871 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -524,7 +524,10 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { // First create the compare. Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); - Value *Cond = Builder.CreateICmpEQ(IV, TC); + // Use ICMP_UGE so that SCEV can analyse the bound of loop count for + // scalable VF. + Value *Cond = Builder.CreateICmp( + State.VF.isScalable() ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_EQ, IV, TC); // Now create the branch. auto *Plan = getParent()->getPlan(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 78452a9c884ee..d18add49b38a8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -709,7 +709,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 ; DEFAULT-NEXT: call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr [[TMP17]], i32 4, [[TMP15]]), !alias.scope [[META20:![0-9]+]], !noalias [[META21:![0-9]+]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index bce2d6c14d866..cfcfed7394f6c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -60,7 +60,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: store zeroinitializer, ptr [[TMP36]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP39]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 8c50d86489c9d..f0e6ab86b089e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -28,7 +28,7 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 763b3e0bc8293..b48e82693e1d7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -38,6 +38,32 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP16:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP14]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret double [[RES_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]] +; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP18]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ; SVE-LABEL: @test( ; SVE-NEXT: entry: @@ -54,23 +80,49 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SVE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; SVE: vector.body: ; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SVE-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SVE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP4]] -; SVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; SVE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; SVE-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], [[TMP7]] -; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP8]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; SVE-NEXT: [[TMP9]] = fadd [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SVE-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; SVE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP6]] +; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; SVE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; SVE-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; SVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], [[TMP9]] +; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP10]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SVE-NEXT: [[TMP11]] = fadd [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SVE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SVE: middle.block: +; SVE-NEXT: [[TMP13:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, [[TMP11]]) +; SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; SVE: scalar.ph: +; SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; SVE-NEXT: br label [[FOR_BODY:%.*]] +; SVE: for.cond.cleanup.loopexit: +; SVE-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; SVE-NEXT: br label [[FOR_COND_CLEANUP]] +; SVE: for.cond.cleanup: +; SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; SVE-NEXT: ret double [[RES_0_LCSSA]] +; SVE: for.body: +; SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SVE-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ] +; SVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]] +; SVE-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; SVE-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP14]] to i64 +; SVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]] +; SVE-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +; SVE-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP15]] +; SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %size, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index edba5ee1d7f9e..f26d533e57e45 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -76,7 +76,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: store [[TMP36]], ptr [[TMP40]], align 1 ; DEFAULT-NEXT: store [[TMP37]], ptr [[TMP43]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; DEFAULT-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP44:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -115,7 +115,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i32 0 ; DEFAULT-NEXT: store [[TMP60]], ptr [[TMP62]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP50]] -; DEFAULT-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC6]] +; DEFAULT-NEXT: [[TMP63:%.*]] = icmp uge i64 [[INDEX_NEXT12]], [[N_VEC6]] ; DEFAULT-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index f467f3cf262d2..433daedc5ce7a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -31,7 +31,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -169,7 +169,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP10]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -340,7 +340,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -516,7 +516,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -574,7 +574,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFFALLBACK: scalar.ph: ; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] @@ -652,7 +652,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -823,7 +823,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: store [[TMP9]], ptr [[TMP10]], align 8 ; TFNONE-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP7]]) ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 736be4995c575..4d968980833b7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -18,41 +18,41 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[OUTER_LOOP_LATCH4]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP12]] = fmul [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP13]] = add nuw nsw [[VEC_PHI]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq [[TMP13]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[OUTER_LOOP_LATCH4]], label [[INNER_LOOP1]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP15:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP14:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[VEC_PHI]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP14]] = fmul [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] +; CHECK-NEXT: [[TMP15]] = add nuw nsw [[VEC_PHI]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq [[TMP15]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 +; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi [ [[TMP14]], [[INNER_LOOP1]] ] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI4]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 9bb9417398526..ffe98b66569c9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -39,7 +39,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP9]], i32 [[TMP17]] ; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, [[N_VEC]] @@ -146,15 +146,15 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0 -; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1 -; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2 -; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3 -; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX2]], 4 -; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5 -; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6 -; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7 +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX1]], 2 +; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX1]], 3 +; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX1]], 5 +; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX1]], 6 +; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX1]], 7 ; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]] ; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]] @@ -176,8 +176,8 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]] ; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7 ; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1000 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1000 ; CHECK-NEXT: br i1 [[TMP81]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 95588d1176dcd..512bfe85c1921 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -95,7 +95,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP57]] = or [[TMP55]], [[WIDE_MASKED_GATHER11]] ; DEFAULT-NEXT: [[TMP58]] = or [[TMP56]], [[WIDE_MASKED_GATHER12]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; DEFAULT-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP59:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or [[TMP58]], [[TMP57]] @@ -348,7 +348,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP21]] = or [[TMP19]], [[VEC_PHI]] ; DEFAULT-NEXT: [[TMP22]] = or [[TMP20]], [[VEC_PHI1]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; DEFAULT-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP23:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or [[TMP22]], [[TMP21]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index a3c108bca140b..e932919540dbe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -52,7 +52,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP19]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 23160c13b821d..542a3dfc1bcea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -32,7 +32,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP12]]) ; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -120,7 +120,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index f922873210b05..7219f7119a8e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -54,7 +54,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP9]]) @@ -101,7 +101,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-ORDERED-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -276,7 +276,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP38]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] ; CHECK-UNORDERED-NEXT: [[TMP39]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP37]], [[TMP36]] @@ -359,7 +359,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP37]], [[WIDE_LOAD2]]) ; CHECK-ORDERED-NEXT: [[TMP39]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], [[WIDE_LOAD3]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -587,7 +587,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd [[TMP14]], [[VEC_PHI1]] ; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd [[TMP15]], [[VEC_PHI]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) @@ -657,7 +657,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-ORDERED-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP12]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -859,7 +859,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd [[VEC_PHI]], [[TMP12]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP13]]) @@ -921,7 +921,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP12]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1102,7 +1102,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] ; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd [[VEC_PHI]], [[PREDPHI]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP13]]) @@ -1163,7 +1163,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] ; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[PREDPHI]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1345,7 +1345,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP12]] = fadd [[TMP9]], [[WIDE_LOAD1]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP12]]) @@ -1535,7 +1535,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP52]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) ; CHECK-UNORDERED-NEXT: [[TMP53]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP51]], [[TMP50]] @@ -1642,7 +1642,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP55]], [[TMP52]]) ; CHECK-ORDERED-NEXT: [[TMP57]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP56]], [[TMP53]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1929,7 +1929,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP52]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) ; CHECK-UNORDERED-NEXT: [[TMP53]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP51]], [[TMP50]] @@ -2036,7 +2036,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP55]], [[TMP52]]) ; CHECK-ORDERED-NEXT: [[TMP57]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP56]], [[TMP53]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index ab4e7e6182262..bbf0ceff6ec57 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -33,7 +33,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0 ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP10]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]] @@ -60,7 +60,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP20]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP17]] -; DEFAULT-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; DEFAULT-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT6]], [[N_VEC3]] ; DEFAULT-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 101, [[N_VEC3]] @@ -212,7 +212,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 ; DEFAULT-NEXT: store [[TMP25]], ptr [[TMP27]], align 1, !alias.scope [[META14:![0-9]+]], !noalias [[META11]] ; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], [[TMP20]] -; DEFAULT-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC]] +; DEFAULT-NEXT: [[TMP28:%.*]] = icmp uge i64 [[INDEX_NEXT8]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index 6f62f2f2096f1..76020bab7d1c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -28,8 +28,8 @@ define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -39,12 +39,12 @@ define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocap ; CHECK: for.body: ; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[I_07]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP9]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[INV]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[INV]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_07]] ; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] @@ -106,8 +106,8 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -117,13 +117,13 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]] -; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP9]], 4.000000e-01 +; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP8]], 4.000000e-01 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[INV]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[INV]], align 8 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store double [[TMP10]], ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: store double [[TMP9]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -186,8 +186,8 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP9]], ptr [[TMP10]], i32 4, [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -197,15 +197,15 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP12]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP11]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 168 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index 4cfc00b99a2b9..84497cf7394a5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -19,36 +19,36 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP17]] = and i64 [[TMP16]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) -; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP21]] = and i64 [[TMP20]], [[VEC_PHI2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP19]], [[TMP17]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP21]], [[TMP19]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -62,20 +62,20 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX7]], 0 +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX6]], 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD9]]) -; CHECK-NEXT: [[TMP27]] = and i64 [[TMP26]], [[VEC_PHI8]] -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD8]]) +; CHECK-NEXT: [[TMP27]] = and i64 [[TMP26]], [[VEC_PHI7]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N10]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 4abf5e13eeeef..b48bb597d1cba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -19,34 +19,34 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP19]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP19]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -62,20 +62,20 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x i64> [ [[TMP22]], [[VEC_EPILOG_PH]] ], [ [[TMP26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX7]], 0 +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x i64> [ [[TMP22]], [[VEC_EPILOG_PH]] ], [ [[TMP26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX6]], 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8 -; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]] -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[WIDE_LOAD8]], [[VEC_PHI7]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP26]]) -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N10]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP28]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 5, [[ITER_CHECK]] ], [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index c255ff31de8d7..e8f4cd9da3b5b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -42,7 +42,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], [[WIDE_LOAD2]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -58,19 +58,19 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]]) -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP24]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI6]], <2 x float> [[WIDE_LOAD7]]) +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N9]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 0ebca14926989..481bd20549b21 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -55,7 +55,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP16]], align 1 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -76,17 +76,17 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP29]], align 1 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp uge i64 [[INDEX_NEXT5]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 1024, [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -133,7 +133,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -233,7 +233,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -302,7 +302,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -383,48 +383,48 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP16]], align 1 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP16]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8 -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP20]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8 +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP22]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX7]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 1 -; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP26]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[NEXT_GEP7]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX6]], [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp uge i64 [[INDEX_NEXT8]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10000, [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 10000, [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] @@ -464,16 +464,16 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 ; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]] -; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 -; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP14]] +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -487,14 +487,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 10000 ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX3]], 0 -; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP19]] -; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0 -; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP21]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 8 -; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000 -; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF8-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP17:%.*]] = add i64 [[INDEX3]], 0 +; CHECK-VF8-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP17]] +; CHECK-VF8-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i32 0 +; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP18]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 8 +; CHECK-VF8-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 +; CHECK-VF8-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 08d0fb77e456d..db259dd768590 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -54,7 +54,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: store [[TMP16]], ptr [[TMP19]], align 2 ; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index 2a80a7affa4f8..6c126b7ddf4a7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -25,8 +25,8 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -36,11 +36,11 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP10]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store float [[TMP9]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -93,8 +93,8 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP7]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -104,12 +104,12 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]] -; CHECK-NEXT: store float [[TMP10]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store float [[TMP9]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -159,8 +159,8 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -170,8 +170,8 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP7]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i32 3, ptr [[INV]], align 4 @@ -230,8 +230,8 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -241,12 +241,12 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP7]], 3 ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[INV]], align 4 -; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[INV]], align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -318,8 +318,8 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -330,9 +330,9 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]] -; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: store float [[TMP15]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 812af1a102083..2e29784dcd2fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -54,7 +54,7 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: store [[TMP24]], ptr [[TMP28]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label %middle.block, label %[[VECTOR_BODY]] ; @@ -126,7 +126,7 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: store [[TMP20]], ptr [[TMP26]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP27]], label %middle.block, label %[[VECTOR_BODY]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 965c71c008aa1..9803f21aa3b23 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -41,8 +41,8 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP11]], i32 4, [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -56,9 +56,9 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index d6794420c403f..9f4f28cc89181 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -49,7 +49,7 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP6]], [[TMP7]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 511 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -138,7 +138,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 511 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -230,7 +230,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], [[TMP15]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 511 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -309,7 +309,7 @@ define i32 @test_struct_load6(ptr %S) #1 { ; CHECK-NEXT: [[TMP16]] = sub [[TMP12]], [[TMP15]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 1023 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) @@ -420,7 +420,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 1023 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -490,8 +490,8 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]] ; CHECK-NEXT: store [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -576,8 +576,8 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP15]] ; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -663,7 +663,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], [[TMP6]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 1023 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -731,7 +731,7 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP6]], [[TMP9]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 511 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -818,7 +818,7 @@ define void @int_float_struct(ptr nocapture readonly %p) #0 { ; CHECK-NEXT: [[TMP6]] = add [[TMP3]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP7]] = fadd fast [[VEC_PHI]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 1023 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP7]]) @@ -915,8 +915,8 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1004,28 +1004,28 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[TMP17]] = add [[TMP16]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP17]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP21:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP20:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP20]], ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP21]] = add nsw i32 [[TMP20]], [[S]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[P_I_X]], align 4 +; CHECK-NEXT: store i32 [[TMP19]], ptr [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP20]] = add nsw i32 [[TMP19]], [[S]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[TMP21]] +; CHECK-NEXT: ret i32 [[TMP20]] ; entry: br label %for.body @@ -1097,8 +1097,8 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1110,8 +1110,8 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 ; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[P_I_MINUS_1_X]], align 4 -; CHECK-NEXT: store i32 [[TMP17]], ptr [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[P_I_MINUS_1_X]], align 4 +; CHECK-NEXT: store i32 [[TMP16]], ptr [[P_I_Y]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]] @@ -1190,31 +1190,31 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP18]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP18]]) +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP18]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP23:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP22:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 ; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I_PLUS_1]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[P_I_PLUS_1_Y]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP23]] = add nsw i32 [[TMP22]], [[S]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_I_X]], align 4 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[P_I_PLUS_1_Y]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_I_Y]], align 4 +; CHECK-NEXT: [[TMP22]] = add nsw i32 [[TMP21]], [[S]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[TMP23]] +; CHECK-NEXT: ret i32 [[TMP22]] ; entry: br label %for.body @@ -1295,8 +1295,8 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1392,8 +1392,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1403,10 +1403,10 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] +; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP18]], i64 -4 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP19]], i64 -4 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP20]], i64 -12 +; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP19]], i64 -12 ; CHECK-NEXT: store i32 [[X]], ptr [[A_I_MINUS_1]], align 4 ; CHECK-NEXT: store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4 ; CHECK-NEXT: store i32 [[Z]], ptr [[A_I]], align 4 @@ -1506,13 +1506,13 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_MASKED_GATHER4]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP30:%.*]] = shl nuw nsw i32 [[TMP29]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = add nsw i32 [[TMP30]], -1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_MASKED_GATHER4]], i32 [[TMP31]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1521,10 +1521,10 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP32]] to i32 ; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[IV1:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 726d98f4d37d3..5baef43d461fb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -35,41 +35,41 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP6]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv32i8.p0(ptr [[TMP10]], i32 1, [[INTERLEAVED_MASK]], poison) +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = shl i32 [[INDEX]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) +; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv32i8.p0(ptr [[TMP12]], i32 1, [[INTERLEAVED_MASK]], poison) ; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[WIDE_MASKED_VEC]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint i32 [[TMP8]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call @llvm.smax.nxv16i8( [[TMP11]], [[TMP12]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = sub zeroinitializer, [[TMP14]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sext i32 [[TMP13]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 -1 -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP14]], [[TMP15]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP18]], i32 1, [[INTERLEAVED_MASK1]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP20]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = or disjoint i32 [[TMP10]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call @llvm.smax.nxv16i8( [[TMP13]], [[TMP14]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = sub zeroinitializer, [[TMP16]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sext i32 [[TMP15]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP18]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 -1 +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP16]], [[TMP17]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK1]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[DOTNOT:%.*]] = icmp ult i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -82,20 +82,20 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then: ; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP22]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = zext nneg i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP24]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP23]], i8 [[TMP25]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP26]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = zext nneg i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP23]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP22]], i8 [[TMP24]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP25:%.*]] = zext nneg i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP25]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP27:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP27]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext nneg i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP26]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: for.inc: @@ -111,16 +111,16 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -128,28 +128,28 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP10]], [[TMP10]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv32i8.p0(ptr [[TMP9]], i32 1, [[INTERLEAVED_MASK]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = shl i32 [[INDEX]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv32i8.p0(ptr [[TMP12]], i32 1, [[INTERLEAVED_MASK]], poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[WIDE_MASKED_VEC]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint i32 [[TMP7]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call @llvm.smax.nxv16i8( [[TMP11]], [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = sub zeroinitializer, [[TMP14]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sext i32 [[TMP13]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 -1 -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP14]], [[TMP15]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP10]], [[TMP10]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP18]], i32 1, [[INTERLEAVED_MASK1]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP20]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = or disjoint i32 [[TMP10]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call @llvm.smax.nxv16i8( [[TMP13]], [[TMP14]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = sub zeroinitializer, [[TMP16]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sext i32 [[TMP15]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP18]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 -1 +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP16]], [[TMP17]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] @@ -227,32 +227,32 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP6]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg [[TMP7]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP8]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP9]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg [[TMP11]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP12]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP13]], i32 1, [[TMP10]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP15]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = zext nneg [[TMP9]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP10]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP11]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint [[TMP9]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg [[TMP13]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP14]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP15]], i32 1, [[TMP12]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[DOTNOT:%.*]] = icmp ult i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -262,15 +262,15 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING: for.body: ; SCALAR_TAIL_FOLDING-NEXT: [[IX_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_012]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP17]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP16]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_012]], [[CONV]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then: ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP18]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP17]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX3]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: for.inc: @@ -286,16 +286,16 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -303,19 +303,19 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext nneg [[TMP6]] to -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP7]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP8]], i32 1, [[ACTIVE_LANE_MASK]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = or disjoint [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg [[TMP10]] to -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP11]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP12]], i32 1, [[TMP13]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP15]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg [[TMP8]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint [[TMP8]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg [[TMP13]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP14]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP15]], i32 1, [[TMP12]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] @@ -389,12 +389,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP6]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -403,21 +403,21 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg [[TMP7]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP10]], i32 1, [[TMP8]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or disjoint [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP12]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP11]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg [[TMP9]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP11]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP12]], i32 1, [[TMP10]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = or disjoint [[TMP9]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = zext nneg [[TMP14]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP15]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP16]], i32 1, [[TMP13]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[DOTNOT:%.*]] = icmp ult i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -430,8 +430,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_018]], [[CONV]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP18]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP17]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[IF_END]] ; SCALAR_TAIL_FOLDING: if.end: @@ -439,8 +439,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP4]], label [[IF_THEN6:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then6: ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP18]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX7]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: for.inc: @@ -457,16 +457,16 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV3:%.*]] = zext i8 [[GUARD2]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD1]] to i32 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 4 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -476,21 +476,21 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg [[TMP6]] to -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP8]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP9]], i32 1, [[TMP10]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or disjoint [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP12]] to -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP15]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP17]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg [[TMP8]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP11]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP12]], i32 1, [[TMP10]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = or disjoint [[TMP8]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg [[TMP15]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP16]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP17]], i32 1, [[TMP14]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index 08da1dedae23e..b568bef7adb14 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -30,7 +30,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP11]] ; CHECK-NEXT: store i16 [[TMP12]], ptr [[DST:%.*]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -92,7 +92,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 40e93f37bbd76..71dc63e45ebbf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -29,27 +29,27 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 -; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP30]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 2 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 8 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], [[TMP16]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP20]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP18]], align 8 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP21]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]] +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[CMO]], 8 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP23]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index b3a2414693ecc..d31001f92ac64 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -61,7 +61,7 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] @@ -159,7 +159,7 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 98081e47b234a..caa102f57dd71 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -47,53 +47,53 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP49]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP23]], i64 [[TMP29]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 8 -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP30]], align 8 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 2 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP25]], i64 [[TMP33]] -; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP31]], align 8 -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP34]], align 8 -; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] -; CHECK-NEXT: [[TMP36:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[TMP37]], i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 2 -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP37]], i64 [[TMP43]] -; CHECK-NEXT: store [[TMP35]], ptr [[TMP41]], align 8 -; CHECK-NEXT: store [[TMP36]], ptr [[TMP44]], align 8 -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[TMP39]], i32 0 -; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 2 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP47]] -; CHECK-NEXT: store [[TMP35]], ptr [[TMP45]], align 8 -; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP50]] -; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP25]], i64 [[TMP31]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 8 +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP32]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP27]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP35]] +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP33]], align 8 +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP36]], align 8 +; CHECK-NEXT: [[TMP37:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] +; CHECK-NEXT: [[TMP38:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i64, ptr [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 2 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP45]] +; CHECK-NEXT: store [[TMP37]], ptr [[TMP43]], align 8 +; CHECK-NEXT: store [[TMP38]], ptr [[TMP46]], align 8 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[TMP41]], i32 0 +; CHECK-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP48]], 2 +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP49]] +; CHECK-NEXT: store [[TMP37]], ptr [[TMP47]], align 8 +; CHECK-NEXT: store [[TMP38]], ptr [[TMP50]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 8500f5fc194c6..c6372250ede67 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -779,7 +779,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index e3bba1338e1df..e100b7862a393 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -23,44 +23,44 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP11]], 3 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 3 -; CHECK-NEXT: [[TMP26:%.*]] = sub i64 0, [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP25]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP26]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[TMP27]] -; CHECK-NEXT: store [[TMP17]], ptr [[TMP23]], align 8 -; CHECK-NEXT: store [[TMP18]], ptr [[TMP29]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP31]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[N]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]] +; CHECK-NEXT: store [[TMP19]], ptr [[TMP25]], align 8 +; CHECK-NEXT: store [[TMP20]], ptr [[TMP31]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -75,8 +75,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00 +; CHECK-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP32]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -125,44 +125,44 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = shl i64 [[TMP33]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[N]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 3 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 1, [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 3 -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 0, [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[TMP18]], i64 [[TMP17]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 8 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP19]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[TMP23]], 3 -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP22]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP27]], 3 -; CHECK-NEXT: [[TMP29:%.*]] = sub i64 0, [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = sub i64 1, [[TMP28]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[TMP22]], i64 [[TMP29]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[TMP31]], i64 [[TMP30]] -; CHECK-NEXT: store [[TMP20]], ptr [[TMP26]], align 8 -; CHECK-NEXT: store [[TMP21]], ptr [[TMP32]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], [[N]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP19]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP21]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 +; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP30:%.*]] = shl i64 [[TMP29]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = sub i64 1, [[TMP30]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP33]], i64 [[TMP32]] +; CHECK-NEXT: store [[TMP22]], ptr [[TMP28]], align 8 +; CHECK-NEXT: store [[TMP23]], ptr [[TMP34]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -177,8 +177,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP36]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP35]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 3bab341e1c248..0388cc8a7d693 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -9,32 +9,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1 ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<%N> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 -; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8> -; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8> -; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]> -; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr inbounds ir<%ptr.iv.2>, ir<1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<[[PTR_IV_1]]> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%ptr.iv.2.next> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%ptr.iv.2> -; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR2]]> -; CHECK-NEXT: WIDEN ir<%add> = add ir<%lv>, ir<1> -; CHECK-NEXT: vp<[[VEC_PTR3:%.+]]> = vector-pointer ir<%ptr.iv.2> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR3]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; In the test below the pointer phi %ptr.iv.2 is used as ; 1. As a uniform address for the load, and @@ -55,8 +29,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -73,20 +47,20 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store [[TMP15]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: store [[TMP19]], ptr [[TMP18]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store [[TMP16]], ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: store [[TMP20]], ptr [[TMP19]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -148,31 +122,31 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: store [[TMP15]], ptr [[TMP14]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: store [[TMP17]], ptr [[TMP16]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 76084776b2b76..1dda5c4b380a0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -63,8 +63,8 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: store [[TMP18]], ptr [[TMP20]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -76,13 +76,13 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[PTR_014:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[PTR_014]], i64 4 -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[PTR_014]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[PTR_014]], align 4 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PTR_014]], i64 8 -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_013]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP24]], 1 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_013]] ; CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 @@ -149,23 +149,23 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD8]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX9:%.*]] = shl nuw nsw i64 [[TMP11]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP6]], i64 [[DOTIDX9]] -; CHECK-NEXT: store [[TMP9]], ptr [[NEXT_GEP6]], align 4 +; CHECK-NEXT: [[DOTIDX7:%.*]] = shl nuw nsw i64 [[TMP11]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 [[DOTIDX7]] +; CHECK-NEXT: store [[TMP9]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: store [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -178,8 +178,8 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[S_010:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[D_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[S_010]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[S_010]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP13]], 1 ; CHECK-NEXT: store i32 [[MUL]], ptr [[D_09]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[D_09]], i64 4 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[S_010]], i64 4 @@ -253,17 +253,17 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: store [[TMP10]], ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP12]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -279,7 +279,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR5]] ; entry: @@ -325,7 +325,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr [[TMP7]], i32 2, [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[INDEX_NEXT]], 1023 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index def912a6d72cf..fe3609c4077ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -22,19 +22,19 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] -; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; NONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NONE-NEXT: br label [[VECTOR_BODY:%.*]] ; NONE: vector.body: ; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; NONE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0 -; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; NONE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] -; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] +; NONE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0 +; NONE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; NONE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 4 +; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] +; NONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT2]], [[N_VEC]] ; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NONE: middle.block: ; NONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] @@ -63,12 +63,12 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA: vector.ph: ; DATA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DATA-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; DATA-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] +; DATA-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]] ; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; DATA-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; DATA-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DATA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -79,9 +79,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 ; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] -; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP8]] +; DATA-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT2]], [[N_VEC]] +; DATA-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA: middle.block: ; DATA-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; DATA: scalar.ph: @@ -108,33 +108,33 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK: vector.ph: ; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] +; DATA_NO_LANEMASK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]] ; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 -; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; DATA_NO_LANEMASK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer ; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]] ; DATA_NO_LANEMASK: vector.body: ; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ] ; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0 -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[INDEX1]], i64 0 -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX1]], i64 0 +; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add zeroinitializer, [[TMP10]] -; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT3]], [[TMP11]] -; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT]] +; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP11]] +; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT3]] ; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, [[TMP12]]) -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]] -; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] -; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP8]] +; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT6]], [[N_VEC]] +; DATA_NO_LANEMASK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_NO_LANEMASK: middle.block: ; DATA_NO_LANEMASK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; DATA_NO_LANEMASK: scalar.ph: @@ -161,12 +161,12 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL: vector.ph: ; DATA_AND_CONTROL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; DATA_AND_CONTROL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] +; DATA_AND_CONTROL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]] ; DATA_AND_CONTROL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA_AND_CONTROL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; DATA_AND_CONTROL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -178,11 +178,11 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 ; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] +; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP8]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) -; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 -; DATA_AND_CONTROL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; DATA_AND_CONTROL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL: middle.block: ; DATA_AND_CONTROL-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; DATA_AND_CONTROL: scalar.ph: @@ -205,12 +205,12 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -227,11 +227,11 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index dec3c286345ad..aa492ede7c9fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -44,7 +44,7 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -127,7 +127,7 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index 4a2f9d07ed91c..75dd057b0b1dd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -16,19 +16,19 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; WIDE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; WIDE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; WIDE-NEXT: br label [[VECTOR_BODY:%.*]] ; WIDE: vector.body: ; WIDE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; WIDE-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] -; WIDE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; WIDE-NEXT: [[TMP5:%.*]] = fptrunc [[WIDE_LOAD]] to -; WIDE-NEXT: [[TMP6:%.*]] = call @foo_vector( [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; WIDE-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 -; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; WIDE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; WIDE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] +; WIDE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; WIDE-NEXT: [[TMP7:%.*]] = fptrunc [[WIDE_LOAD]] to +; WIDE-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; WIDE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; WIDE-NEXT: store [[TMP8]], ptr [[TMP9]], align 4 +; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; WIDE-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; WIDE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; WIDE: middle.block: ; WIDE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index d970b427d035d..d9816d48b0972 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -47,7 +47,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() @@ -144,7 +144,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() @@ -241,7 +241,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() @@ -348,7 +348,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() @@ -457,7 +457,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() @@ -574,7 +574,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() @@ -679,7 +679,7 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, i16 99, i64 0), poison, zeroinitializer), [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -769,7 +769,7 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], shufflevector ( insertelement ( poison, i16 99, i64 0), poison, zeroinitializer), [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index da5db810ec638..55cfbb5b4af5b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -52,7 +52,7 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP21]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] @@ -135,7 +135,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0( zeroinitializer, [[TMP16]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index 563ee0f630775..90086eff9f1b6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -36,7 +36,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -97,7 +97,7 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 7ca1b5395dd01..88d0dbc7c9853 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -33,7 +33,7 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -137,7 +137,7 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -241,7 +241,7 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -345,7 +345,7 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -452,8 +452,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -506,8 +506,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXED: scalar.ph: @@ -583,8 +583,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -637,8 +637,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXED: scalar.ph: @@ -711,8 +711,8 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -761,8 +761,8 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXED: scalar.ph: @@ -835,8 +835,8 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -885,8 +885,8 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXED: scalar.ph: @@ -960,8 +960,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1012,8 +1012,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: store <32 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 ; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 -; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXED: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 8824fa8a16b74..23a84f319b4b8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -36,7 +36,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to ; OUTLOOP-NEXT: [[TMP10]] = add [[VEC_PHI]], [[TMP9]] ; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: ; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) @@ -91,7 +91,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP9]]) ; INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; INLOOP-NEXT: [[TMP12:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INLOOP: middle.block: ; INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -171,7 +171,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] -; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -243,7 +243,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; OUTLOOP-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], [[VEC_PHI]] ; OUTLOOP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; OUTLOOP: middle.block: ; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP10]]) @@ -291,7 +291,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[WIDE_LOAD]]) ; INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) ; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; INLOOP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INLOOP: middle.block: ; INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -314,6 +314,77 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: ret i32 [[SMIN_LCSSA]] ; +; IF-EVL-OUTLOOP-LABEL: @smin( +; IF-EVL-OUTLOOP-NEXT: entry: +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL-OUTLOOP: for.body: +; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] +; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-OUTLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP0]], [[RDX]] +; IF-EVL-OUTLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP0]], i32 [[RDX]] +; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; IF-EVL-OUTLOOP: for.end: +; IF-EVL-OUTLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[SMIN_LCSSA]] +; +; IF-EVL-INLOOP-LABEL: @smin( +; IF-EVL-INLOOP-NEXT: entry: +; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL-INLOOP: vector.ph: +; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] +; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL-INLOOP: vector.body: +; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) +; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 +; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] +; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] +; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-INLOOP: middle.block: +; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP: scalar.ph: +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL-INLOOP: for.body: +; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-INLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP17]], [[RDX]] +; IF-EVL-INLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] +; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-INLOOP: for.end: +; IF-EVL-INLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] +; IF-EVL-INLOOP-NEXT: ret i32 [[SMIN_LCSSA]] +; ; IF-EVL-LABEL: @smin( ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] @@ -375,7 +446,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL: for.end: ; IF-EVL-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[SMIN_LCSSA]] -; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 87bc77cb7767f..130506884c88e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -36,7 +36,7 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -142,7 +142,7 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) ; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -225,7 +225,7 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -331,7 +331,7 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) ; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -1188,7 +1188,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -1296,7 +1296,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; SCALABLE-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -1376,7 +1376,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -1484,7 +1484,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0 ; SCALABLE-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index b0aefae41a1fc..f7c3e3da600da 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -15,18 +15,18 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; LMUL1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: ; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; LMUL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP2]] -; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; LMUL1-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL1-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP3]] +; LMUL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 8 +; LMUL1-NEXT: [[TMP6:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL1-NEXT: store [[TMP6]], ptr [[TMP5]], align 8 +; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; LMUL1-NEXT: [[TMP7:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: ; LMUL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -57,19 +57,19 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; LMUL2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL2-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL2: vector.body: ; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; LMUL2-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL2-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; LMUL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL2-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; LMUL2-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL2-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; LMUL2-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL2: middle.block: ; LMUL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -100,19 +100,19 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; LMUL4-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL4-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; LMUL4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL4-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; LMUL4-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL4: vector.body: ; LMUL4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL4-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; LMUL4-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL4-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; LMUL4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL4-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; LMUL4-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL4-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; LMUL4-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL4-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL4: middle.block: ; LMUL4-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -143,19 +143,19 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; LMUL8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; LMUL8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; LMUL8-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL8: vector.body: ; LMUL8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; LMUL8-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL8-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; LMUL8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL8-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; LMUL8-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL8-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; LMUL8-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL8: middle.block: ; LMUL8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 1307d57f32bc1..a80ab007a8bf3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -50,7 +50,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP19]], align 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index a151232df0cd5..8c33cee23e061 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -66,7 +66,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV32-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: ; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] @@ -148,7 +148,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV64-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: ; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 5f81416003714..e03450ddcada6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -50,14 +50,14 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[EXT:%.+]] = extractelement [[TMP19]], i32 0 +; CHECK-NEXT: [[EXT:%.*]] = extractelement [[TMP19]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[TMP12]], i64 poison ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0( zeroinitializer, ptr [[TMP25]], i32 2, [[TMP22]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 77b0ae2f846a3..f2c7c759a3e94 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -33,7 +33,7 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] @@ -156,7 +156,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] @@ -222,7 +222,7 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index cb27dc4f2d289..2ed4c3a543bfa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -24,21 +24,21 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -69,21 +69,21 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; VLEN128-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -135,21 +135,21 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; VLENUNK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -180,21 +180,21 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; VLEN128-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -301,21 +301,21 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP9]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -346,21 +346,21 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]] +; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; VLEN128-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP9]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -409,24 +409,24 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; VLENUNK-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP9]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; VLENUNK-NEXT: [[TMP10]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLENUNK: middle.block: -; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) +; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLENUNK: scalar.ph: @@ -473,7 +473,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP9]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; VLEN128-NEXT: [[TMP10]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) @@ -529,19 +529,19 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -570,19 +570,19 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -627,19 +627,19 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLENUNK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -668,19 +668,19 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index aa7b7a5676eef..d1d7ae53116e1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -13,27 +13,27 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -77,27 +77,27 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -139,34 +139,34 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP12]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -180,7 +180,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -209,25 +209,25 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -265,26 +265,26 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -357,27 +357,27 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1024) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 8388d018b89e8..46f45eb992eb0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -54,7 +54,7 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) @@ -129,7 +129,7 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) @@ -200,7 +200,7 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) @@ -271,7 +271,7 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) @@ -342,7 +342,7 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) @@ -446,7 +446,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP13:%.*]] = or [[VEC_PHI]], [[TMP12]] ; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[PREDPHI]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 6936887cd166c..b3259fce01f55 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -39,7 +39,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] @@ -111,7 +111,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -196,7 +196,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP16]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] @@ -264,7 +264,7 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -348,7 +348,7 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -482,7 +482,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 ; NOSTRIDED-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -559,7 +559,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: [[TMP22:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -629,7 +629,7 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -782,7 +782,7 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] -; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: [[TMP31:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 1db718a0e42f9..5023d0a1ec60b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -35,7 +35,7 @@ define void @test_pr98413_zext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 97, [[N_VEC]] @@ -109,7 +109,7 @@ define void @test_pr98413_sext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 97, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 0ecba2f304682..c85033c043d05 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -21,20 +21,20 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -96,12 +96,12 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -113,9 +113,9 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -194,20 +194,20 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -224,7 +224,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: for.end: -; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; SCALABLE-NEXT: ret i64 [[V_LCSSA]] ; ; FIXEDLEN-LABEL: @uniform_load_outside_use( @@ -324,16 +324,16 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -341,17 +341,17 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP11]], poison) -; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[WIDE_MASKED_GATHER]], zeroinitializer -; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] -; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 -; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP14]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; SCALABLE-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP13:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP13]], poison) +; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[WIDE_MASKED_GATHER]], zeroinitializer +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP12]] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP15]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -394,16 +394,16 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP3]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[WIDE_MASKED_GATHER2]], <4 x i64> zeroinitializer -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] -; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI3]], ptr [[TMP9]], align 8 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI3]], ptr [[TMP7]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: ; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: @@ -432,12 +432,12 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -459,13 +459,13 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer ; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP13]], poison) ; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[WIDE_MASKED_GATHER]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] -; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP18]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP15]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -504,13 +504,13 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer -; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 -; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: ; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: @@ -568,20 +568,20 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 1 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -643,12 +643,12 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -660,9 +660,9 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -741,20 +741,20 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -816,12 +816,12 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -833,9 +833,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -914,31 +914,31 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP5:%.*]] = add zeroinitializer, [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] -; SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; SCALABLE-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 2 -; SCALABLE-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 -; SCALABLE-NEXT: [[TMP13:%.*]] = extractelement [[TMP7]], i32 [[TMP12]] -; SCALABLE-NEXT: store i64 [[TMP13]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP15]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] -; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP7:%.*]] = add zeroinitializer, [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9:%.*]] = add [[DOTSPLAT]], [[TMP8]] +; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; SCALABLE-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; SCALABLE-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 2 +; SCALABLE-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 +; SCALABLE-NEXT: [[TMP15:%.*]] = extractelement [[TMP9]], i32 [[TMP14]] +; SCALABLE-NEXT: store i64 [[TMP15]], ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP17]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -1006,12 +1006,12 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1035,10 +1035,10 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1143,16 +1143,16 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 -; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -1162,15 +1162,15 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) -; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] -; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; SCALABLE-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP13:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP13]]) +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP12]] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP15]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -1250,12 +1250,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1279,12 +1279,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer ; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP13]]) ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] -; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1324,12 +1324,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 -; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: ; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: @@ -1385,20 +1385,20 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -1460,12 +1460,12 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1477,9 +1477,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 8bde5ba5f1519..16a656e2e0a4e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -76,7 +76,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -126,8 +126,8 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = select [[TMP21]], [[WIDE_MASKED_LOAD]], zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP17]] = add [[TMP16]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-OUTLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP17]]) ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -178,7 +178,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-INLOOP: middle.block: ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -289,7 +289,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -344,8 +344,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = xor [[TMP22]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP17]], [[VEC_PHI]], [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-OUTLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PREDPHI]]) ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -400,7 +400,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP-INLOOP: middle.block: ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -532,8 +532,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; NO-VP-OUTLOOP-NEXT: [[TMP24:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP23]]) ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -596,7 +596,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-INLOOP: middle.block: ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -729,8 +729,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP23]], [[VEC_PHI]], [[TMP22]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; NO-VP-OUTLOOP-NEXT: [[TMP24:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PREDPHI]]) ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -797,7 +797,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: middle.block: ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll index 73dc3e4313a65..38cc05d0bebdb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll @@ -44,7 +44,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -89,7 +89,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -244,7 +244,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -289,7 +289,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = or i32 [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -361,7 +361,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -406,7 +406,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = and i32 [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -478,7 +478,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -523,7 +523,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = xor i32 [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -595,7 +595,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -641,7 +641,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -715,7 +715,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -761,7 +761,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -835,7 +835,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -881,7 +881,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -955,7 +955,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1001,7 +1001,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1075,7 +1075,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1120,7 +1120,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = fadd reassoc float [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1276,7 +1276,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1323,7 +1323,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1398,7 +1398,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1445,7 +1445,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1689,7 +1689,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1740,7 +1740,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP11]]) ; NO-VP-NEXT: [[TMP13]] = fadd reassoc float [[TMP12]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -1825,7 +1825,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) @@ -1911,7 +1911,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) ; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index 895c89b768acb..a0fcb8611d4ed 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -13,29 +13,29 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-LABEL: @interleave( ; IF-EVL-NEXT: entry: -; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 -1, [[N:%.*]] -; IF-EVL-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP31]], 8 -; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP17]], [[TMP2]] +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; IF-EVL-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP32]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer -; IF-EVL-NEXT: [[TMP13:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; IF-EVL-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 -; IF-EVL-NEXT: [[TMP37:%.*]] = mul i64 1, [[TMP15]] -; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP37]], i64 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = add [[TMP9]], zeroinitializer +; IF-EVL-NEXT: [[TMP11:%.*]] = mul [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 1, [[TMP13]] +; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 ; IF-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -44,36 +44,36 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; IF-EVL-NEXT: [[TMP38:%.*]] = add i64 [[TMP19]], 0 -; IF-EVL-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 1 -; IF-EVL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], [[TMP39]] -; IF-EVL-NEXT: [[TMP23:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp ule [[STEP_ADD]], [[BROADCAST_SPLAT]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], [[VEC_IND]], i32 0 -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 0 -; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP25]], i32 4, [[TMP23]], poison) -; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP26]], i32 4, [[TMP24]], poison) -; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[VEC_IND]], i32 1 -; IF-EVL-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 1 -; IF-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP27]], i32 4, [[TMP23]], poison) -; IF-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP28]], i32 4, [[TMP24]], poison) -; IF-EVL-NEXT: [[TMP29:%.*]] = add nsw [[WIDE_MASKED_GATHER3]], [[WIDE_MASKED_GATHER]] -; IF-EVL-NEXT: [[TMP30:%.*]] = add nsw [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER2]] -; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] -; IF-EVL-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; IF-EVL-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 -; IF-EVL-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP35]] -; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP29]], ptr [[TMP33]], i32 4, [[TMP23]]) -; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP30]], ptr [[TMP36]], i32 4, [[TMP24]]) -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 +; IF-EVL-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 +; IF-EVL-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 0 +; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 1 +; IF-EVL-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], [[TMP19]] +; IF-EVL-NEXT: [[TMP21:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ule [[STEP_ADD]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], [[VEC_IND]], i32 0 +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP23]], i32 4, [[TMP21]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP24]], i32 4, [[TMP22]], poison) +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[VEC_IND]], i32 1 +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 1 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP25]], i32 4, [[TMP21]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP26]], i32 4, [[TMP22]], poison) +; IF-EVL-NEXT: [[TMP27:%.*]] = add nsw [[WIDE_MASKED_GATHER3]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: [[TMP28:%.*]] = add nsw [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER2]] +; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]] +; IF-EVL-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 0 +; IF-EVL-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 4 +; IF-EVL-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[TMP33]] +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP27]], ptr [[TMP31]], i32 4, [[TMP21]]) +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP28]], ptr [[TMP34]], i32 4, [[TMP22]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP35:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -82,10 +82,10 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 -; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 -; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; IF-EVL-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP36]] ; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 @@ -139,7 +139,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-NEXT: store [[TMP20]], ptr [[TMP24]], align 4 ; NO-VP-NEXT: store [[TMP21]], ptr [[TMP27]], align 4 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP28:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll index fcea9e8d81ff6..425cc73dd17d3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll @@ -80,8 +80,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] -; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4 ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -135,7 +135,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 4, !alias.scope [[META0:![0-9]+]] ; NO-VP-OUTLOOP-NEXT: [[TMP12]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP12]]) @@ -192,7 +192,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-INLOOP: middle.block: ; NO-VP-INLOOP-NEXT: store i32 [[TMP12]], ptr [[ADDR]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll index cb4cf3adfbaa0..15beb9874949a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll @@ -12,80 +12,80 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-LABEL: @iv32( ; IF-EVL-NEXT: entry: -; IF-EVL-NEXT: [[TMP19:%.*]] = sub i32 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i32 -1, [[N:%.*]] ; IF-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP19]], [[TMP2]] -; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 -; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true) -; IF-EVL-NEXT: [[TMP13:%.*]] = add i32 [[EVL_BASED_IV]], 0 -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP13]] +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP9]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i32 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP11]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP11]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP13]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP_LOAD]], ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[TMP10]] -; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP_LOAD]], ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP10]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP8]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV1]] -; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV1]] -; IF-EVL-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4 -; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT1]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] +; IF-EVL-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: for.cond.cleanup: ; IF-EVL-NEXT: ret void ; ; NO-VP-LABEL: @iv32( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP0]], 4 -; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP10]] +; NO-VP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: -; NO-VP-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP1]], 4 -; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]] -; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP12:%.*]] = mul i32 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP3]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] -; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; NO-VP-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] -; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP6]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP6]] +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-NEXT: store [[WIDE_LOAD]], ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -95,9 +95,9 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; NO-VP: for.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] -; NO-VP-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] -; NO-VP-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX4]], align 4 ; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] ; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index 9d02ce715139e..5954a173fd3df 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -20,43 +20,43 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) -; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP15:%.*]] = add zeroinitializer, [[TMP14]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] -; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer -; IF-EVL-NEXT: [[TMP20:%.*]] = select [[TMP16]], [[TMP19]], zeroinitializer -; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP13]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP23:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP23]], ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP24:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP13:%.*]] = add zeroinitializer, [[TMP12]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP13]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer +; IF-EVL-NEXT: [[TMP18:%.*]] = select [[TMP14]], [[TMP17]], zeroinitializer +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP21]], ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP10]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP22]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] +; IF-EVL-NEXT: [[TMP23:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -65,13 +65,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL: for.body: ; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP26]], 0 +; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP24]], 0 ; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; IF-EVL: if.then: ; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]] +; IF-EVL-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP24]], [[TMP25]] ; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 ; IF-EVL-NEXT: br label [[FOR_INC]] ; IF-EVL: for.inc: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll index 314d30f86ee57..6eb2923975165 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll @@ -43,7 +43,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index 2bbcd362ce16c..00c79dceab14b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -50,7 +50,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) @@ -214,7 +214,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10]] = or [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP10]]) @@ -295,7 +295,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10]] = and [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP10]]) @@ -376,7 +376,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10]] = xor [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP10]]) @@ -460,7 +460,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP10]]) @@ -546,7 +546,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp sgt [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP10]]) @@ -632,7 +632,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp ult [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP10]]) @@ -718,7 +718,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp ugt [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP10]]) @@ -801,7 +801,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10]] = fadd reassoc [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP10]]) @@ -968,7 +968,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP10]]) @@ -1054,7 +1054,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast ogt [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select [[TMP9]], [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP10]]) @@ -1308,7 +1308,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; NO-VP-NEXT: [[TMP12]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD]], [[WIDE_LOAD1]], [[VEC_PHI]]) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP13:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP12]]) @@ -1394,7 +1394,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) @@ -1480,7 +1480,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) ; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index d62f70c06a5fb..f9c1d7e138a00 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -16,46 +16,46 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] ; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP7]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 -; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], -1 -; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP10]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP13]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP13]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP10]] -; IF-EVL-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 -; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 0, [[TMP20]] -; IF-EVL-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP20]] -; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP21]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP22]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP24]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[TMP25:%.*]] = zext i32 [[TMP8]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], -1 +; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] +; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; IF-EVL-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]] +; IF-EVL-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP11]] +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP12]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP13]] +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] +; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 +; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]] +; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]] +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP19]] +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP20]] +; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP22]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -119,61 +119,61 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] ; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP7]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 -; IF-EVL-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX3]], 0 +; IF-EVL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX3]], 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP12:%.*]] = add zeroinitializer, [[TMP11]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP12]] -; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP14:%.*]] = add i64 [[TMP9]], -1 -; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP10]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP18:%.*]] = select [[TMP13]], [[TMP17]], zeroinitializer -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP21]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP21]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP25]], [[VP_REVERSE_MASK]], i32 [[TMP8]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 -; IF-EVL-NEXT: [[TMP29:%.*]] = mul i64 0, [[TMP28]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 1, [[TMP28]] -; IF-EVL-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP29]] -; IF-EVL-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i64 [[TMP30]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP8]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP32]], [[VP_REVERSE_MASK6]], i32 [[TMP8]]) -; IF-EVL-NEXT: [[TMP33:%.*]] = zext i32 [[TMP8]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP33]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; IF-EVL-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = add zeroinitializer, [[TMP9]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP10]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[TMP7]], -1 +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]] +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP16:%.*]] = select [[TMP11]], [[TMP15]], zeroinitializer +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]] +; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]] +; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]] +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]] +; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]] +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], [[VP_REVERSE_MASK]], i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]] +; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 +; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]] +; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]] +; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]] +; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]] +; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP30]], [[VP_REVERSE_MASK6]], i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP32:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll index 2dd47d5c1ea8a..fc63b0b4999e0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll @@ -42,7 +42,7 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -362,7 +362,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 362bfd61ebd07..10d7bc4b3d1d9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -20,34 +20,34 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) -; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP16]], ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP10]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] +; IF-EVL-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -56,10 +56,10 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] ; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 @@ -96,7 +96,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; NO-VP-NEXT: store [[TMP11]], ptr [[TMP13]], align 4 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 08f2b823815b1..051cc37888b85 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -57,7 +57,7 @@ define void @foo() { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] @@ -113,6 +113,7 @@ define void @foo() { ; NO_SCALABLE_VECS-NEXT: br i1 [[OUTER_EXITCOND]], label [[EXIT:%.*]], label [[OUTER_LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SCALABLE_VECS: exit: ; NO_SCALABLE_VECS-NEXT: ret void +; entry: br label %outer_loop diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 030eb9e76b51a..bfe6a5aba2370 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -47,8 +47,8 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -58,8 +58,8 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP18]], [[I_08]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP17]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -126,8 +126,8 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -137,8 +137,8 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP17]], [[I_08]] +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -205,8 +205,8 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -287,8 +287,8 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index b14d5a9f4bb57..d2c7bafe5a46e 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -32,7 +32,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, [[N_VEC]] @@ -104,7 +104,7 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index 965428098342f..c9b3ea3efa4c5 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -23,8 +23,8 @@ ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index ; CHECKUF1: store %[[FADD]], ptr %[[IDXA]], align 8 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]] -; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec -; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 +; CHECKUF1: %[[CMP:.*]] = icmp ult i64 %index.next, %n.vec +; CHECKUF1: br i1 %[[CMP]], label %vector.body, label %middle.block, !llvm.loop !0 ; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2). @@ -61,8 +61,8 @@ ; CHECKUF2: store %[[FADD]], ptr %[[IDXA]], align 8 ; CHECKUF2: store %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8 ; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]] -; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec -; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 +; CHECKUF2: %[[CMP:.*]] = icmp ult i64 %index.next, %n.vec +; CHECKUF2: br i1 %[[CMP]], label %vector.body, label %middle.block, !llvm.loop !0 define void @loop(i32 %N, ptr nocapture %a, ptr nocapture readonly %b) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 7d9f0c1c6d258..f1f6a8c09d9e8 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -5,35 +5,77 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( -; CHECK: call i32 @llvm.vscale.i32() -; CHECK: call i32 @llvm.vscale.i32() -; CHECK: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 256, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 16 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ] -; CHECK: [[TMP14:%.*]] = and [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 255, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = and [[VEC_PHI1]], shufflevector ( insertelement ( poison, i32 255, i64 0), poison, zeroinitializer) -; CHECK: [[WIDE_LOAD:%.*]] = load , ptr -; CHECK: [[WIDE_LOAD2:%.*]] = load , ptr -; CHECK-NEXT: [[TMP26:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD2]] to -; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP14]], [[TMP26]] -; CHECK-NEXT: [[TMP29:%.*]] = add [[TMP15]], [[TMP27]] -; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to -; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to -; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to -; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 255, i32 0), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = and [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 255, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = and [[VEC_PHI1]], shufflevector ( insertelement ( poison, i32 255, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP21:%.*]] = zext [[WIDE_LOAD2]] to +; CHECK-NEXT: [[TMP22:%.*]] = add [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = add [[TMP13]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc [[TMP22]] to +; CHECK-NEXT: [[TMP25:%.*]] = trunc [[TMP23]] to +; CHECK-NEXT: [[TMP26]] = zext [[TMP24]] to +; CHECK-NEXT: [[TMP27]] = zext [[TMP25]] to +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp uge i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP37:%.*]] = trunc [[TMP34]] to -; CHECK-NEXT: [[TMP38:%.*]] = trunc [[TMP36]] to -; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP38]], [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8( [[BIN_RDX]]) -; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32 -; CHECK-NEXT: %cmp.n = icmp eq i32 256, %n.vec +; CHECK-NEXT: [[TMP29:%.*]] = trunc [[TMP26]] to +; CHECK-NEXT: [[TMP30:%.*]] = trunc [[TMP27]] to +; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8( [[BIN_RDX]]) +; CHECK-NEXT: [[TMP32:%.*]] = zext i8 [[TMP31]] to i32 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP32]], [[MIDDLE_BLOCK]] ], [ 255, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 +; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 +; CHECK-NEXT: [[L3E:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[L9]] = add i32 [[SUM_02]], [[L3E]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 256 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], [[LOOP]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[SUM_0_LCSSA]] to i8 +; CHECK-NEXT: ret i8 [[RET]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index 18d2323ed6f5b..90f92a233aa02 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -13,18 +13,18 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP4]], ptr [[TMP5]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP6]], ptr [[TMP7]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -73,21 +73,21 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[ARG1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP6:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = xor [[WIDE_LOAD]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP5]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor [[WIDE_LOAD]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[LEN]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index fe98ca167a089..f3e64359ce8a6 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -37,33 +37,33 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-LABEL: @foo( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], 4 -; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP8]] +; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: -; NO-VP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP1]], 4 -; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]] -; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP15:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] -; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 -; NO-VP-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; NO-VP-NEXT: store [[TMP16]], ptr [[TMP10]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] -; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: store [[TMP11]], ptr [[TMP13]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[TMP14:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -73,10 +73,10 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: for.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; NO-VP-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] ; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 @@ -110,7 +110,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-DEF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 ; NO-VP-DEF-NEXT: store [[TMP8]], ptr [[TMP10]], align 4 ; NO-VP-DEF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] -; NO-VP-DEF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-DEF-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-DEF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-DEF: middle.block: ; NO-VP-DEF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]