diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h index f7a893708758c..8577fc72ecd0f 100644 --- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h +++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h @@ -99,6 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result); struct RetainedKnowledge { Attribute::AttrKind AttrKind = Attribute::None; uint64_t ArgValue = 0; + Value *IRArgValue = nullptr; Value *WasOn = nullptr; bool operator==(RetainedKnowledge Other) const { return AttrKind == Other.AttrKind && WasOn == Other.WasOn && diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp index c27bfa6f3cc2c..7366fabca3eeb 100644 --- a/llvm/lib/Analysis/AssumeBundleQueries.cpp +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -114,6 +114,7 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume, }; if (BOI.End - BOI.Begin > ABA_Argument) Result.ArgValue = GetArgOr1(0); + Result.IRArgValue = getValueFromBundleOpInfo(Assume, BOI, ABA_Argument); if (Result.AttrKind == Attribute::Alignment) if (BOI.End - BOI.Begin > ABA_Argument + 1) Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index b461c41d29e84..2b5c7e19e8d62 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -31,6 +31,40 @@ static bool isAligned(const Value *Base, Align Alignment, return Base->getPointerAlignment(DL) >= Alignment; } +static bool isDereferenceableAndAlignedPointerViaAssumption( + const Value *Ptr, Align Alignment, + function_ref CheckSize, + const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC, + const DominatorTree *DT) { + // Dereferenceable information from assumptions is only valid if the value + // cannot be freed between the assumption and use. For now just use the + // information for values that cannot be freed in the function. + // TODO: More precisely check if the pointer can be freed between assumption + // and use. + if (!CtxI || Ptr->canBeFreed()) + return false; + /// Look through assumes to see if both dereferencability and alignment can + /// be proven by an assume if needed. + RetainedKnowledge AlignRK; + RetainedKnowledge DerefRK; + bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment; + return getKnowledgeForValue( + Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (RK.AttrKind == Attribute::Alignment) + AlignRK = std::max(AlignRK, RK); + if (RK.AttrKind == Attribute::Dereferenceable) + DerefRK = std::max(DerefRK, RK); + IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); + if (IsAligned && DerefRK && CheckSize(DerefRK)) + return true; // We have found what we needed so we stop looking + return false; // Other assumes may have better information. so + // keep looking + }); +} + /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( @@ -169,38 +203,12 @@ static bool isDereferenceableAndAlignedPointer( Size, DL, CtxI, AC, DT, TLI, Visited, MaxDepth); - // Dereferenceable information from assumptions is only valid if the value - // cannot be freed between the assumption and use. For now just use the - // information for values that cannot be freed in the function. - // TODO: More precisely check if the pointer can be freed between assumption - // and use. - if (CtxI && !V->canBeFreed()) { - /// Look through assumes to see if both dereferencability and alignment can - /// be proven by an assume if needed. - RetainedKnowledge AlignRK; - RetainedKnowledge DerefRK; - bool IsAligned = V->getPointerAlignment(DL) >= Alignment; - if (getKnowledgeForValue( - V, {Attribute::Dereferenceable, Attribute::Alignment}, AC, - [&](RetainedKnowledge RK, Instruction *Assume, auto) { - if (!isValidAssumeForContext(Assume, CtxI, DT)) - return false; - if (RK.AttrKind == Attribute::Alignment) - AlignRK = std::max(AlignRK, RK); - if (RK.AttrKind == Attribute::Dereferenceable) - DerefRK = std::max(DerefRK, RK); - IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); - if (IsAligned && DerefRK && - DerefRK.ArgValue >= Size.getZExtValue()) - return true; // We have found what we needed so we stop looking - return false; // Other assumes may have better information. so - // keep looking - })) - return true; - } - - // If we don't know, assume the worst. - return false; + return isDereferenceableAndAlignedPointerViaAssumption( + V, Alignment, + [Size](const RetainedKnowledge &RK) { + return RK.ArgValue >= Size.getZExtValue(); + }, + DL, CtxI, AC, DT); } bool llvm::isDereferenceableAndAlignedPointer( @@ -317,8 +325,8 @@ bool llvm::isDereferenceableAndAlignedInLoop( return false; const SCEV *MaxBECount = - Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) - : SE.getConstantMaxBackedgeTakenCount(L); + Predicates ? SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates) + : SE.getSymbolicMaxBackedgeTakenCount(L); if (isa(MaxBECount)) return false; @@ -334,9 +342,11 @@ bool llvm::isDereferenceableAndAlignedInLoop( Value *Base = nullptr; APInt AccessSize; + const SCEV *AccessSizeSCEV = nullptr; if (const SCEVUnknown *NewBase = dyn_cast(AccessStart)) { Base = NewBase->getValue(); AccessSize = MaxPtrDiff; + AccessSizeSCEV = PtrDiff; } else if (auto *MinAdd = dyn_cast(AccessStart)) { if (MinAdd->getNumOperands() != 2) return false; @@ -360,12 +370,20 @@ bool llvm::isDereferenceableAndAlignedInLoop( return false; AccessSize = MaxPtrDiff + Offset->getAPInt(); + AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset); Base = NewBase->getValue(); } else return false; Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt(); - return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + return isDereferenceableAndAlignedPointerViaAssumption( + Base, Alignment, + [&SE, AccessSizeSCEV](const RetainedKnowledge &RK) { + return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV, + SE.getSCEV(RK.IRArgValue)); + }, + DL, HeaderFirstNonPHI, AC, &DT) || + isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, HeaderFirstNonPHI, AC, &DT); } diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index d1cbe02192e31..344f4c5bb0d79 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -185,15 +185,32 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 @@ -268,15 +285,32 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0