From fc269c14e24b6a9731ce354fb1f1e682cb78d53e Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Wed, 28 May 2025 11:05:47 -0500 Subject: [PATCH 1/9] Added attributor for identifying `!invariant.load`s. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 38 +++ llvm/lib/Transforms/IPO/Attributor.cpp | 2 + .../Transforms/IPO/AttributorAttributes.cpp | 245 ++++++++++++++++++ .../multiple-offsets-pointer-info.ll | 8 +- .../Attributor/tag-invariant-loads.ll | 220 ++++++++++++++++ 5 files changed, 509 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/tag-invariant-loads.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c628bbb007230..53fa7a04dc5b5 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6289,6 +6289,44 @@ struct AAUnderlyingObjects : AbstractAttribute { AA::ValueScope Scope = AA::Interprocedural) const = 0; }; +/// An abstract interface for identifying pointers from which loads can be +/// marked invariant. +struct AAInvariantLoadPointer : public AbstractAttribute { + AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {} + + /// See AbstractAttribute::isValidIRPositionForInit + static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { + if (!IRP.getAssociatedType()->isPointerTy()) + return false; + return AbstractAttribute::isValidIRPositionForInit(A, IRP); + } + + /// Create an abstract attribute view for the position \p IRP. + static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Return true if the pointer's contents are known to remain invariant. + virtual bool isKnownInvariant() const = 0; + + /// Return true if the pointer's contents are assumed to remain invariant. + virtual bool isAssumedInvariant() const = 0; + + /// See AbstractAttribute::getName(). + StringRef getName() const override { return "AAInvariantLoadPointer"; } + + /// See AbstractAttribute::getIdAddr(). + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAInvariantLoadPointer + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address). + static const char ID; +}; + /// An abstract interface for address space information. struct AAAddressSpace : public StateWrapper { AAAddressSpace(const IRPosition &IRP, Attributor &A) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index cbdbf9ae1494d..1dc576656d12a 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3620,6 +3620,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, UsedAssumedInformation, AA::Intraprocedural); + getOrCreateAAFor( + IRPosition::value(*LI->getPointerOperand())); getOrCreateAAFor( IRPosition::value(*LI->getPointerOperand())); } else { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 470c5308edca4..f0647747d6c7f 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability) PIPE_OPERATOR(AAPointerInfo) PIPE_OPERATOR(AAAssumptionInfo) PIPE_OPERATOR(AAUnderlyingObjects) +PIPE_OPERATOR(AAInvariantLoadPointer) PIPE_OPERATOR(AAAddressSpace) PIPE_OPERATOR(AAAllocationInfo) PIPE_OPERATOR(AAIndirectCallInfo) @@ -12534,6 +12535,248 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { }; } // namespace +/// --------------------- Invariant Load Pointer ------------------------------- +namespace { + +struct AAInvariantLoadPointerImpl + : public StateWrapper, AAInvariantLoadPointer, + uint8_t> { + // load invariance is implied by, but not equivalent to IS_NOALIAS | + // IS_READONLY, as load invariance is also implied by all underlying objects + // being load invariant. + // + // IS_INVARIANT is set to indicate that the contents of the pointer are + // *known* to be invariant. + enum { + IS_INVARIANT = 1 << 0, + IS_NOALIAS = 1 << 1, + IS_READONLY = 1 << 2, + }; + static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY), + "Unexpected best state!"); + + using Base = StateWrapper, AAInvariantLoadPointer, + uint8_t>; + + // the BitIntegerState is optimistic about noalias and readonly, but + // pessimistic about invariance + AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A) + : Base(IRP, IS_NOALIAS | IS_READONLY) {} + + void initialize(Attributor &A) final { + // conservatively assume that the pointer's contents are not invariant, + // until proven otherwise. + removeAssumedBits(IS_INVARIANT); + } + + bool isKnownInvariant() const final { + return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY); + } + + bool isAssumedInvariant() const final { + return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY); + } + + ChangeStatus updateImpl(Attributor &A) override { + if (isKnownInvariant()) + return ChangeStatus::UNCHANGED; + + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + Changed |= updateNoAlias(A); + Changed |= updateReadOnly(A); + + bool UsedAssumedInformation = false; + const auto IsInvariantLoadIfPointer = [&](const Value &V) { + if (!V.getType()->isPointerTy()) + return true; + const auto *IsInvariantLoadPointer = + A.getOrCreateAAFor(IRPosition::value(V), this, + DepClassTy::REQUIRED); + if (IsInvariantLoadPointer->isKnownInvariant()) + return true; + if (!IsInvariantLoadPointer->isAssumedInvariant()) + return false; + + UsedAssumedInformation = true; + return true; + }; + + const auto *AUO = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED); + + if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) { + removeAssumedBits(IS_INVARIANT); + return ChangeStatus::CHANGED; + } + + if (!UsedAssumedInformation) { + // pointer is known (not assumed) to be invariant + addKnownBits(IS_INVARIANT); + return ChangeStatus::CHANGED; + } + + return Changed; + } + + ChangeStatus manifest(Attributor &A) override { + if (!isKnownInvariant()) + return ChangeStatus::UNCHANGED; + + ChangeStatus Changed = ChangeStatus::UNCHANGED; + Value *Ptr = &getAssociatedValue(); + const auto TagInvariantLoads = [&](const Use &U, bool &) { + if (U.get() != Ptr) + return true; + auto *I = dyn_cast(U.getUser()); + if (!I) + return true; + + // Ensure that we are only changing uses from the corresponding callgraph + // SSC in the case that the AA isn't run on the entire module + if (!A.isRunOn(I->getFunction())) + return true; + + if (I->hasMetadata(LLVMContext::MD_invariant_load)) + return true; + + if (auto *LI = dyn_cast(I)) { + if (LI->isVolatile() || LI->isAtomic()) + return true; + + LI->setMetadata(LLVMContext::MD_invariant_load, + MDNode::get(LI->getContext(), {})); + Changed = ChangeStatus::CHANGED; + } + return true; + }; + + (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr); + return Changed; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr(Attributor *) const override { + std::string Str; + raw_string_ostream OS(Str); + OS << "load invariant pointer: " << isKnown() << '\n'; + return Str; + } + + /// See AbstractAttribute::trackStatistics(). + void trackStatistics() const override {} + +protected: + ChangeStatus updateNoAlias(Attributor &A) { + if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) + return ChangeStatus::UNCHANGED; + + const auto *ANoAlias = A.getOrCreateAAFor(getIRPosition(), this, + DepClassTy::REQUIRED); + if (!ANoAlias) + return tryInferNoAlias(A); + + if (!ANoAlias->isAssumedNoAlias()) { + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + if (ANoAlias->isKnownNoAlias()) + addKnownBits(IS_NOALIAS); + + return ChangeStatus::UNCHANGED; + } + + /// Fallback method if updateNoAlias fails to infer noalias information from + /// AANoAlias. + virtual ChangeStatus tryInferNoAlias(Attributor &A) { + return ChangeStatus::UNCHANGED; + } + + ChangeStatus updateReadOnly(Attributor &A) { + if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY)) + return ChangeStatus::UNCHANGED; + + // AAMemoryBehavior may crash if value is global + if (!getAssociatedFunction()) + return tryInferReadOnly(A); + + const auto *AMemoryBehavior = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED); + if (!AMemoryBehavior) + return tryInferReadOnly(A); + + if (!AMemoryBehavior->isAssumedReadOnly()) { + removeAssumedBits(IS_READONLY); + return ChangeStatus::CHANGED; + } + if (AMemoryBehavior->isKnownReadOnly()) + addKnownBits(IS_READONLY); + + return ChangeStatus::UNCHANGED; + } + + /// Fallback method if updateReadOnly fails to infer readonly information from + /// AAMemoryBehavior. + virtual ChangeStatus tryInferReadOnly(Attributor &A) { + return ChangeStatus::UNCHANGED; + } +}; + +struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; + +struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; + +struct AAInvariantLoadPointerCallSiteReturned final + : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; + +struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} + +protected: + ChangeStatus tryInferNoAlias(Attributor &A) override { + const auto *Arg = getAssociatedArgument(); + if (Arg->hasNoAliasAttr()) { + addKnownBits(IS_NOALIAS); + return ChangeStatus::UNCHANGED; + } + + // noalias information is not provided, and cannot be inferred from + // AANoAlias + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + + ChangeStatus tryInferReadOnly(Attributor &A) override { + const auto *Arg = getAssociatedArgument(); + if (Arg->onlyReadsMemory()) { + addKnownBits(IS_READONLY); + return ChangeStatus::UNCHANGED; + } + + // readonly information is not provided, and cannot be inferred from + // AAMemoryBehavior + removeAssumedBits(IS_READONLY); + return ChangeStatus::CHANGED; + } +}; + +struct AAInvariantLoadPointerCallSiteArgument final + : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; +} // namespace + /// ------------------------ Address Space ------------------------------------ namespace { @@ -13031,6 +13274,7 @@ const char AAInterFnReachability::ID = 0; const char AAPointerInfo::ID = 0; const char AAAssumptionInfo::ID = 0; const char AAUnderlyingObjects::ID = 0; +const char AAInvariantLoadPointer::ID = 0; const char AAAddressSpace::ID = 0; const char AAAllocationInfo::ID = 0; const char AAIndirectCallInfo::ID = 0; @@ -13165,6 +13409,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo) diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll index f04ac4d73340f..9e58a35107491 100644 --- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll @@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23 ; CHECK-NEXT: store i8 23, ptr [[GEP23]], align 4 ; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29 @@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 ; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]] @@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 ; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]] @@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23 ; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll new file mode 100644 index 0000000000000..6df07a0d68bee --- /dev/null +++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=attributor %s -S | FileCheck %s + +@G = global i32 zeroinitializer, align 4 + +declare ptr @get_ptr() +declare noalias ptr @get_noalias_ptr() + +define i32 @test_plain(ptr %ptr) { +; CHECK-LABEL: define i32 @test_plain( +; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_noalias_ptr(ptr noalias %ptr) { +; CHECK-LABEL: define i32 @test_noalias_ptr( +; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_swap(ptr noalias %ptr, i32 %write) { +; CHECK-LABEL: define i32 @test_swap( +; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: store i32 [[WRITE]], ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr %ptr, align 4 + store i32 %write, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_volatile_load(ptr noalias %ptr) { +; CHECK-LABEL: define i32 @test_volatile_load( +; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load volatile i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_atomic_load(ptr noalias %ptr) { +; CHECK-LABEL: define i32 @test_atomic_load( +; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load atomic i32, ptr %ptr unordered, align 4 + ret i32 %val +} + +define i32 @test_atomic_volatile_load(ptr noalias %ptr) { +; CHECK-LABEL: define i32 @test_atomic_volatile_load( +; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load atomic volatile i32, ptr %ptr unordered, align 4 + ret i32 %val +} + +define i32 @test_global() { +; CHECK-LABEL: define i32 @test_global( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr @G, align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr @G, align 4 + ret i32 %val +} + +define internal i32 @test_internal_noalias_load(ptr %ptr) { +; CHECK-LABEL: define internal i32 @test_internal_noalias_load( +; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_call_internal_noalias(ptr noalias %ptr) { +; CHECK-LABEL: define i32 @test_call_internal_noalias( +; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = call i32 @test_internal_noalias_load(ptr %ptr) + ret i32 %val +} + +define internal i32 @test_internal_load(ptr %ptr) { +; CHECK-LABEL: define internal i32 @test_internal_load( +; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_call_internal(ptr %ptr) { +; CHECK-LABEL: define i32 @test_call_internal( +; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %val = call i32 @test_internal_load(ptr %ptr) + ret i32 %val +} + +define i32 @test_call_ptr() { +; CHECK-LABEL: define i32 @test_call_ptr() { +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_ptr() +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %ptr = call ptr @get_ptr() + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_call_noalias_ptr() { +; CHECK-LABEL: define i32 @test_call_noalias_ptr() { +; CHECK-NEXT: [[PTR:%.*]] = call noalias ptr @get_noalias_ptr() +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %ptr = call ptr @get_noalias_ptr() + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) { +; CHECK-LABEL: define i32 @test_selected_load( +; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: ret i32 [[VAL]] +; + %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) { +; CHECK-LABEL: define i32 @test_selected_load_partial_noalias( +; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) { +; CHECK-LABEL: define i32 @test_branch_load( +; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: br label %[[FINISH:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: br label %[[FINISH]] +; CHECK: [[FINISH]]: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + br i1 %cond, label %true, label %false +true: + br label %finish +false: + br label %finish +finish: + %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} + +define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) { +; CHECK-LABEL: define i32 @test_branch_load_partial_noalias( +; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: br label %[[FINISH:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: br label %[[FINISH]] +; CHECK: [[FINISH]]: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + br i1 %cond, label %true, label %false +true: + br label %finish +false: + br label %finish +finish: + %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr %ptr, align 4 + ret i32 %val +} +;. +; CHECK: [[META0]] = !{} +;. From e095a93c82fad7530d152b1888131feb1d1133f4 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Wed, 28 May 2025 12:42:15 -0500 Subject: [PATCH 2/9] Incorporated feedback --- llvm/include/llvm/Transforms/IPO/Attributor.h | 1 + .../Transforms/IPO/AttributorAttributes.cpp | 128 ++++--- .../multiple-offsets-pointer-info.ll | 8 +- .../Attributor/tag-invariant-loads.ll | 357 ++++++++++-------- 4 files changed, 268 insertions(+), 226 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 53fa7a04dc5b5..38996bb051328 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6298,6 +6298,7 @@ struct AAInvariantLoadPointer : public AbstractAttribute { static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { if (!IRP.getAssociatedType()->isPointerTy()) return false; + return AbstractAttribute::isValidIRPositionForInit(A, IRP); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index f0647747d6c7f..dec36b3e7dcb3 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12579,7 +12579,7 @@ struct AAInvariantLoadPointerImpl ChangeStatus updateImpl(Attributor &A) override { if (isKnownInvariant()) - return ChangeStatus::UNCHANGED; + return indicateOptimisticFixpoint(); ChangeStatus Changed = ChangeStatus::UNCHANGED; @@ -12605,15 +12605,13 @@ struct AAInvariantLoadPointerImpl const auto *AUO = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED); - if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) { - removeAssumedBits(IS_INVARIANT); - return ChangeStatus::CHANGED; - } + if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) + return indicatePessimisticFixpoint(); if (!UsedAssumedInformation) { // pointer is known (not assumed) to be invariant addKnownBits(IS_INVARIANT); - return ChangeStatus::CHANGED; + return indicateOptimisticFixpoint() | Changed; } return Changed; @@ -12671,24 +12669,44 @@ struct AAInvariantLoadPointerImpl if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) return ChangeStatus::UNCHANGED; - const auto *ANoAlias = A.getOrCreateAAFor(getIRPosition(), this, - DepClassTy::REQUIRED); - if (!ANoAlias) - return tryInferNoAlias(A); + const auto *F = getAssociatedFunction(); - if (!ANoAlias->isAssumedNoAlias()) { + if (F && isCallableCC(F->getCallingConv())) { + // program-wide alias information cannot be inferred removeAssumedBits(IS_NOALIAS); return ChangeStatus::CHANGED; } - if (ANoAlias->isKnownNoAlias()) - addKnownBits(IS_NOALIAS); - return ChangeStatus::UNCHANGED; - } + // try to use AANoAlias + if (const auto *ANoAlias = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED)) { + if (ANoAlias->isKnownNoAlias()) { + addKnownBits(IS_NOALIAS); + return ChangeStatus::UNCHANGED; + } + + if (!ANoAlias->isAssumedNoAlias()) { + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; + } + + // if the function is not callable, try to infer noalias from argument + // attribute, since it is applicable for the duration of the function + if (const auto *Arg = getAssociatedArgument()) { + if (Arg->hasNoAliasAttr()) { + addKnownBits(IS_NOALIAS); + return ChangeStatus::UNCHANGED; + } + + // noalias information is not provided, and cannot be inferred, + // so we conservatively assume the pointer aliases. + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } - /// Fallback method if updateNoAlias fails to infer noalias information from - /// AANoAlias. - virtual ChangeStatus tryInferNoAlias(Attributor &A) { return ChangeStatus::UNCHANGED; } @@ -12696,28 +12714,45 @@ struct AAInvariantLoadPointerImpl if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY)) return ChangeStatus::UNCHANGED; - // AAMemoryBehavior may crash if value is global - if (!getAssociatedFunction()) - return tryInferReadOnly(A); + const auto *F = getAssociatedFunction(); - const auto *AMemoryBehavior = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED); - if (!AMemoryBehavior) - return tryInferReadOnly(A); + if (!F) + return ChangeStatus::UNCHANGED; - if (!AMemoryBehavior->isAssumedReadOnly()) { + if (isCallableCC(F->getCallingConv())) { + // readonly attribute is only useful if applicable program-wide removeAssumedBits(IS_READONLY); return ChangeStatus::CHANGED; } - if (AMemoryBehavior->isKnownReadOnly()) - addKnownBits(IS_READONLY); - return ChangeStatus::UNCHANGED; - } + // try to use AAMemoryBehavior to infer readonly attribute + if (const auto *AMemoryBehavior = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED)) { + if (!AMemoryBehavior->isAssumedReadOnly()) { + removeAssumedBits(IS_READONLY); + return ChangeStatus::CHANGED; + } + + if (AMemoryBehavior->isKnownReadOnly()) { + addKnownBits(IS_READONLY); + return ChangeStatus::UNCHANGED; + } + + return ChangeStatus::UNCHANGED; + } + + if (const auto *Arg = getAssociatedArgument()) { + if (Arg->onlyReadsMemory()) { + addKnownBits(IS_READONLY); + return ChangeStatus::UNCHANGED; + } + + // readonly information is not provided, and cannot be inferred from + // AAMemoryBehavior + removeAssumedBits(IS_READONLY); + return ChangeStatus::CHANGED; + } - /// Fallback method if updateReadOnly fails to infer readonly information from - /// AAMemoryBehavior. - virtual ChangeStatus tryInferReadOnly(Attributor &A) { return ChangeStatus::UNCHANGED; } }; @@ -12741,33 +12776,6 @@ struct AAInvariantLoadPointerCallSiteReturned final struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A) : AAInvariantLoadPointerImpl(IRP, A) {} - -protected: - ChangeStatus tryInferNoAlias(Attributor &A) override { - const auto *Arg = getAssociatedArgument(); - if (Arg->hasNoAliasAttr()) { - addKnownBits(IS_NOALIAS); - return ChangeStatus::UNCHANGED; - } - - // noalias information is not provided, and cannot be inferred from - // AANoAlias - removeAssumedBits(IS_NOALIAS); - return ChangeStatus::CHANGED; - } - - ChangeStatus tryInferReadOnly(Attributor &A) override { - const auto *Arg = getAssociatedArgument(); - if (Arg->onlyReadsMemory()) { - addKnownBits(IS_READONLY); - return ChangeStatus::UNCHANGED; - } - - // readonly information is not provided, and cannot be inferred from - // AAMemoryBehavior - removeAssumedBits(IS_READONLY); - return ChangeStatus::CHANGED; - } }; struct AAInvariantLoadPointerCallSiteArgument final diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll index 9e58a35107491..f04ac4d73340f 100644 --- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll @@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23 ; CHECK-NEXT: store i8 23, ptr [[GEP23]], align 4 ; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29 @@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 ; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]] @@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 ; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]] @@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1) +; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1) ; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23 ; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll index 6df07a0d68bee..02c304822bcb8 100644 --- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll @@ -1,220 +1,253 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=attributor %s -S | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN -@G = global i32 zeroinitializer, align 4 +@G = addrspace(1) global i32 zeroinitializer, align 4 +declare void @clobber(i32) +declare ptr addrspace(1) @get_ptr() +declare noalias ptr addrspace(1) @get_noalias_ptr() -declare ptr @get_ptr() -declare noalias ptr @get_noalias_ptr() - -define i32 @test_plain(ptr %ptr) { -; CHECK-LABEL: define i32 @test_plain( -; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define void @test_nonkernel( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_noalias_ptr(ptr noalias %ptr) { -; CHECK-LABEL: define i32 @test_noalias_ptr( -; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_swap(ptr noalias %ptr, i32 %write) { -; CHECK-LABEL: define i32 @test_swap( -; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: store i32 [[WRITE]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load i32, ptr %ptr, align 4 - store i32 %write, ptr %ptr, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_volatile_load(ptr noalias %ptr) { -; CHECK-LABEL: define i32 @test_volatile_load( -; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load volatile i32, ptr %ptr, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) %ptr, align 4 + store i32 %swap, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_atomic_load(ptr noalias %ptr) { -; CHECK-LABEL: define i32 @test_atomic_load( -; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load atomic i32, ptr %ptr unordered, align 4 - ret i32 %val + %val = load volatile i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_atomic_volatile_load(ptr noalias %ptr) { -; CHECK-LABEL: define i32 @test_atomic_volatile_load( -; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load atomic volatile i32, ptr %ptr unordered, align 4 - ret i32 %val + %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_global() { -; CHECK-LABEL: define i32 @test_global( -; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr @G, align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_global() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_global() { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %val = load i32, ptr @G, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) @G, align 4 + call void @clobber(i32 %val) + ret void } -define internal i32 @test_internal_noalias_load(ptr %ptr) { -; CHECK-LABEL: define internal i32 @test_internal_noalias_load( -; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] -; CHECK-NEXT: ret i32 [[VAL]] +define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: ret i32 [[VAL]] ; - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %val = load i32, ptr addrspace(1) %ptr, align 4 + ret i32 %val +} + +define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void +; + %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) + call void @clobber(i32 %val) + ret void } -define i32 @test_call_internal_noalias(ptr noalias %ptr) { -; CHECK-LABEL: define i32 @test_call_internal_noalias( -; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]] -; CHECK-NEXT: ret i32 [[VAL]] -; - %val = call i32 @test_internal_noalias_load(ptr %ptr) - ret i32 %val -} +define internal i32 @test_internal_load(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_load( +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ret i32 %val +} -define internal i32 @test_internal_load(ptr %ptr) { -; CHECK-LABEL: define internal i32 @test_internal_load( -; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] -; - %val = load i32, ptr %ptr, align 4 - ret i32 %val -} +define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void +; + %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) + call void @clobber(i32 %val) + ret void +} -define i32 @test_call_internal(ptr %ptr) { -; CHECK-LABEL: define i32 @test_call_internal( -; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]] -; CHECK-NEXT: ret i32 [[VAL]] -; - %val = call i32 @test_internal_load(ptr %ptr) - ret i32 %val +define amdgpu_kernel void @test_call_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() { +; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_ptr() +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_call_ptr() { -; CHECK-LABEL: define i32 @test_call_ptr() { -; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_ptr() -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_call_noalias_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() { +; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr() +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %ptr = call ptr @get_ptr() - %val = load i32, ptr %ptr, align 4 - ret i32 %val -} - -define i32 @test_call_noalias_ptr() { -; CHECK-LABEL: define i32 @test_call_noalias_ptr() { -; CHECK-NEXT: [[PTR:%.*]] = call noalias ptr @get_noalias_ptr() -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] -; CHECK-NEXT: ret i32 [[VAL]] -; - %ptr = call ptr @get_noalias_ptr() - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %ptr = call ptr addrspace(1) @get_noalias_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) { -; CHECK-LABEL: define i32 @test_selected_load( -; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] -; CHECK-NEXT: ret i32 [[VAL]] -; - %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false - %val = load i32, ptr %ptr, align 4 - ret i32 %val +define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void +; + %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) { -; CHECK-LABEL: define i32 @test_selected_load_partial_noalias( -; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias( +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; - %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } -define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) { -; CHECK-LABEL: define i32 @test_branch_load( -; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] -; CHECK: [[TRUE]]: -; CHECK-NEXT: br label %[[FINISH:.*]] -; CHECK: [[FALSE]]: -; CHECK-NEXT: br label %[[FINISH]] -; CHECK: [[FINISH]]: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]] -; CHECK-NEXT: ret i32 [[VAL]] +define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load( +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-NEXT: [[ENTRY:.*:]] +; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; AMDGCN: [[TRUE]]: +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) +; AMDGCN-NEXT: br label %[[FINISH:.*]] +; AMDGCN: [[FALSE]]: +; AMDGCN-NEXT: br label %[[FINISH]] +; AMDGCN: [[FINISH]]: +; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; entry: br i1 %cond, label %true, label %false true: + call void @clobber(i32 1) br label %finish false: br label %finish finish: - %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ] - %val = load i32, ptr %ptr, align 4 - ret i32 %val -} - -define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) { -; CHECK-LABEL: define i32 @test_branch_load_partial_noalias( -; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] -; CHECK: [[TRUE]]: -; CHECK-NEXT: br label %[[FINISH:.*]] -; CHECK: [[FALSE]]: -; CHECK-NEXT: br label %[[FINISH]] -; CHECK: [[FINISH]]: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] + %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias( +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-NEXT: [[ENTRY:.*:]] +; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; AMDGCN: [[TRUE]]: +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) +; AMDGCN-NEXT: br label %[[FINISH:.*]] +; AMDGCN: [[FALSE]]: +; AMDGCN-NEXT: br label %[[FINISH]] +; AMDGCN: [[FINISH]]: +; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: ret void ; entry: br i1 %cond, label %true, label %false true: + call void @clobber(i32 1) br label %finish false: br label %finish finish: - %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ] - %val = load i32, ptr %ptr, align 4 - ret i32 %val + %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void } ;. -; CHECK: [[META0]] = !{} +; AMDGCN: [[META0]] = !{} ;. From ef97544e9bc31e61c84e7d1e8b044ac3a61ca164 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Thu, 29 May 2025 16:02:57 -0500 Subject: [PATCH 3/9] Added guards for side-effects on loads. "Side effects" include volatile loads and atomic loads that are at least monotonic. --- .../Transforms/IPO/AttributorAttributes.cpp | 79 +++++----- .../Attributor/tag-invariant-loads.ll | 139 +++++++++++------- 2 files changed, 135 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index dec36b3e7dcb3..b178cc5951e3d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12542,39 +12542,44 @@ struct AAInvariantLoadPointerImpl : public StateWrapper, AAInvariantLoadPointer, uint8_t> { // load invariance is implied by, but not equivalent to IS_NOALIAS | - // IS_READONLY, as load invariance is also implied by all underlying objects + // IS_NOEFFECT, as load invariance is also implied by all underlying objects // being load invariant. // - // IS_INVARIANT is set to indicate that the contents of the pointer are - // *known* to be invariant. + // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are + // *known* to be invariant, and is therefore a pessimistic bit. enum { - IS_INVARIANT = 1 << 0, + IS_KNOWN_INVARIANT = 1 << 0, IS_NOALIAS = 1 << 1, - IS_READONLY = 1 << 2, + IS_NOEFFECT = 1 << 2, + + IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT, }; - static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY), + static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT), "Unexpected best state!"); using Base = StateWrapper, AAInvariantLoadPointer, uint8_t>; - // the BitIntegerState is optimistic about noalias and readonly, but - // pessimistic about invariance + // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but + // pessimistic about IS_KNOWN_INVARIANT AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A) - : Base(IRP, IS_NOALIAS | IS_READONLY) {} + : Base(IRP, IS_IMPLIED_INVARIANT) {} void initialize(Attributor &A) final { - // conservatively assume that the pointer's contents are not invariant, - // until proven otherwise. - removeAssumedBits(IS_INVARIANT); + removeAssumedBits(IS_KNOWN_INVARIANT); } bool isKnownInvariant() const final { - return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY); + return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT); } bool isAssumedInvariant() const final { - return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY); + if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT)) + return true; + // if the function is callable, optimistically assume that invariance can be + // inferred from the caller + const auto *F = getAssociatedFunction(); + return F && isCallableCC(F->getCallingConv()); } ChangeStatus updateImpl(Attributor &A) override { @@ -12583,8 +12588,12 @@ struct AAInvariantLoadPointerImpl ChangeStatus Changed = ChangeStatus::UNCHANGED; - Changed |= updateNoAlias(A); - Changed |= updateReadOnly(A); + Changed |= checkNoAlias(A); + Changed |= checkNoEffect(A); + + // try to infer invariance from underlying objects + const auto *AUO = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED); bool UsedAssumedInformation = false; const auto IsInvariantLoadIfPointer = [&](const Value &V) { @@ -12601,16 +12610,12 @@ struct AAInvariantLoadPointerImpl UsedAssumedInformation = true; return true; }; - - const auto *AUO = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED); - if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) return indicatePessimisticFixpoint(); if (!UsedAssumedInformation) { // pointer is known (not assumed) to be invariant - addKnownBits(IS_INVARIANT); + addKnownBits(IS_KNOWN_INVARIANT); return indicateOptimisticFixpoint() | Changed; } @@ -12639,8 +12644,6 @@ struct AAInvariantLoadPointerImpl return true; if (auto *LI = dyn_cast(I)) { - if (LI->isVolatile() || LI->isAtomic()) - return true; LI->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(LI->getContext(), {})); @@ -12664,8 +12667,8 @@ struct AAInvariantLoadPointerImpl /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} -protected: - ChangeStatus updateNoAlias(Attributor &A) { +private: + ChangeStatus checkNoAlias(Attributor &A) { if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) return ChangeStatus::UNCHANGED; @@ -12710,8 +12713,8 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - ChangeStatus updateReadOnly(Attributor &A) { - if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY)) + ChangeStatus checkNoEffect(Attributor &A) { + if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT)) return ChangeStatus::UNCHANGED; const auto *F = getAssociatedFunction(); @@ -12720,8 +12723,18 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; if (isCallableCC(F->getCallingConv())) { - // readonly attribute is only useful if applicable program-wide - removeAssumedBits(IS_READONLY); + // effects cannot be tracked outside of function call; + // conservatively assume pointer has effectful uses + removeAssumedBits(IS_NOEFFECT); + return ChangeStatus::CHANGED; + } + + const auto HasNoSideEffects = [](const Use &U, bool &) { + const auto *I = dyn_cast(U.getUser()); + return !I || !I->mayHaveSideEffects(); + }; + if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) { + removeAssumedBits(IS_NOEFFECT); return ChangeStatus::CHANGED; } @@ -12729,12 +12742,12 @@ struct AAInvariantLoadPointerImpl if (const auto *AMemoryBehavior = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED)) { if (!AMemoryBehavior->isAssumedReadOnly()) { - removeAssumedBits(IS_READONLY); + removeAssumedBits(IS_NOEFFECT); return ChangeStatus::CHANGED; } if (AMemoryBehavior->isKnownReadOnly()) { - addKnownBits(IS_READONLY); + addKnownBits(IS_NOEFFECT); return ChangeStatus::UNCHANGED; } @@ -12743,13 +12756,13 @@ struct AAInvariantLoadPointerImpl if (const auto *Arg = getAssociatedArgument()) { if (Arg->onlyReadsMemory()) { - addKnownBits(IS_READONLY); + addKnownBits(IS_NOEFFECT); return ChangeStatus::UNCHANGED; } // readonly information is not provided, and cannot be inferred from // AAMemoryBehavior - removeAssumedBits(IS_READONLY); + removeAssumedBits(IS_NOEFFECT); return ChangeStatus::CHANGED; } diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll index 02c304822bcb8..b73e6ffafbe4a 100644 --- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll @@ -1,40 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN @G = addrspace(1) global i32 zeroinitializer, align 4 -declare void @clobber(i32) -declare ptr addrspace(1) @get_ptr() -declare noalias ptr addrspace(1) @get_noalias_ptr() +declare void @clobber(i32) #0 +declare ptr addrspace(1) @get_ptr() #0 +attributes #0 = { nofree norecurse nosync nounwind willreturn } define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define void @test_nonkernel( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be !invariant.load, as the caller may modify %ptr call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( -; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -44,13 +46,14 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; cannot be !invariant.load due to the write to %ptr store i32 %swap, ptr addrspace(1) %ptr, align 4 call void @clobber(i32 %val) ret void @@ -58,21 +61,22 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load volatile i32, ptr addrspace(1) %ptr, align 4 + ;; volatiles loads cannot be !invariant.load call void @clobber(i32 %val) ret void } -define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) { -; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 @@ -80,32 +84,48 @@ define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) { ret void } +define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: ret void +; + %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 + ;; atomic loads with ordering guarantees may have side effects + call void @clobber(i32 %val) + ret void +} + define amdgpu_kernel void @test_global() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_global() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_global( +; AMDGCN-SAME: ) #[[ATTR1]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) @G, align 4 + ;; is not an !invariant.load as global variables may change call void @clobber(i32 %val) ret void } define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] ; AMDGCN-NEXT: ret i32 [[VAL]] ; %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; is an !invariant.load due to its only caller @test_call_internal_noalias ret i32 %val } define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) @@ -115,19 +135,20 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias % define internal i32 @test_internal_load(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define internal i32 @test_internal_load( -; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] { +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: ret i32 [[VAL]] ; %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since the pointer in @test_call_internal may alias ret i32 %val } define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) @@ -135,74 +156,90 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { ret void } -define amdgpu_kernel void @test_call_ptr() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() { -; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_ptr() +define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_written( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) -; AMDGCN-NEXT: ret void +; AMDGCN-NEXT: ret i32 [[VAL]] ; - %ptr = call ptr addrspace(1) @get_ptr() %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written + ret i32 %val +} + +define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] { +; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: ret void +; + store i32 %x, ptr addrspace(1) %ptr + %val = call i32 @test_internal_written(ptr addrspace(1) %ptr) call void @clobber(i32 %val) ret void } -define amdgpu_kernel void @test_call_noalias_ptr() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() { -; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr() +define amdgpu_kernel void @test_call_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr( +; AMDGCN-SAME: ) #[[ATTR1]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; - %ptr = call ptr addrspace(1) @get_noalias_ptr() + %ptr = call ptr addrspace(1) @get_ptr() %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since %ptr may alias call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; either pointer yields an !invariant.load call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; %ptr.false may alias, so no !invariant.load call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; entry: @@ -215,24 +252,25 @@ false: finish: %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; either pointer yields an !invariant.load call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) { +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] ; AMDGCN-NEXT: ret void ; entry: @@ -245,6 +283,7 @@ false: finish: %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; ptr.false may alias, so no !invariant.load call void @clobber(i32 %val) ret void } From fe750fd8a26093fedef3c791e264f3fa1f1415df Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Mon, 2 Jun 2025 21:23:47 -0500 Subject: [PATCH 4/9] Corrected and refactored attributor logic. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 2 + .../Transforms/IPO/AttributorAttributes.cpp | 172 +++++++++++------- .../Attributor/tag-invariant-loads.ll | 118 +++++++----- 3 files changed, 179 insertions(+), 113 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 38996bb051328..55be0838d464a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6308,9 +6308,11 @@ struct AAInvariantLoadPointer : public AbstractAttribute { /// Return true if the pointer's contents are known to remain invariant. virtual bool isKnownInvariant() const = 0; + virtual bool isKnownLocallyInvariant() const = 0; /// Return true if the pointer's contents are assumed to remain invariant. virtual bool isAssumedInvariant() const = 0; + virtual bool isAssumedLocallyInvariant() const = 0; /// See AbstractAttribute::getName(). StringRef getName() const override { return "AAInvariantLoadPointer"; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b178cc5951e3d..cfe7611276feb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12539,47 +12539,49 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { namespace { struct AAInvariantLoadPointerImpl - : public StateWrapper, AAInvariantLoadPointer, - uint8_t> { - // load invariance is implied by, but not equivalent to IS_NOALIAS | - // IS_NOEFFECT, as load invariance is also implied by all underlying objects - // being load invariant. - // - // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are - // *known* to be invariant, and is therefore a pessimistic bit. - enum { - IS_KNOWN_INVARIANT = 1 << 0, - IS_NOALIAS = 1 << 1, - IS_NOEFFECT = 1 << 2, + : public StateWrapper, + AAInvariantLoadPointer> { - IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT, + enum { + // pointer does not alias within the bounds of the function + IS_NOALIAS = 1 << 0, + // pointer is not involved in any effectful instructions within the bounds + // of the function + IS_NOEFFECT = 1 << 1, + // loads are invariant within the bounds of the function + IS_LOCALLY_INVARIANT = 1 << 2, + // memory lifetime is constrained within the bounds of the function + IS_LOCALLY_CONSTRAINED = 1 << 3, + + IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT | + IS_LOCALLY_CONSTRAINED, }; - static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT), - "Unexpected best state!"); + static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state"); - using Base = StateWrapper, AAInvariantLoadPointer, - uint8_t>; + using Base = + StateWrapper, AAInvariantLoadPointer>; // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but // pessimistic about IS_KNOWN_INVARIANT AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A) - : Base(IRP, IS_IMPLIED_INVARIANT) {} - - void initialize(Attributor &A) final { - removeAssumedBits(IS_KNOWN_INVARIANT); - } + : Base(IRP) {} bool isKnownInvariant() const final { - return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT); + return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED); + } + bool isKnownLocallyInvariant() const final { + if (isKnown(IS_LOCALLY_INVARIANT)) + return true; + return isKnown(IS_NOALIAS | IS_NOEFFECT); } bool isAssumedInvariant() const final { - if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT)) + return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED); + } + bool isAssumedLocallyInvariant() const final { + if (isAssumed(IS_LOCALLY_INVARIANT)) return true; - // if the function is callable, optimistically assume that invariance can be - // inferred from the caller - const auto *F = getAssociatedFunction(); - return F && isCallableCC(F->getCallingConv()); + return isAssumed(IS_NOALIAS | IS_NOEFFECT); } ChangeStatus updateImpl(Attributor &A) override { @@ -12589,6 +12591,9 @@ struct AAInvariantLoadPointerImpl ChangeStatus Changed = ChangeStatus::UNCHANGED; Changed |= checkNoAlias(A); + if (requiresNoAlias() && !isAssumed(IS_NOALIAS)) + return indicatePessimisticFixpoint(); + Changed |= checkNoEffect(A); // try to infer invariance from underlying objects @@ -12602,9 +12607,9 @@ struct AAInvariantLoadPointerImpl const auto *IsInvariantLoadPointer = A.getOrCreateAAFor(IRPosition::value(V), this, DepClassTy::REQUIRED); - if (IsInvariantLoadPointer->isKnownInvariant()) + if (IsInvariantLoadPointer->isKnownLocallyInvariant()) return true; - if (!IsInvariantLoadPointer->isAssumedInvariant()) + if (!IsInvariantLoadPointer->isAssumedLocallyInvariant()) return false; UsedAssumedInformation = true; @@ -12614,9 +12619,9 @@ struct AAInvariantLoadPointerImpl return indicatePessimisticFixpoint(); if (!UsedAssumedInformation) { - // pointer is known (not assumed) to be invariant - addKnownBits(IS_KNOWN_INVARIANT); - return indicateOptimisticFixpoint() | Changed; + // pointer is known (not assumed) to be locally invariant + addKnownBits(IS_LOCALLY_INVARIANT); + return Changed; } return Changed; @@ -12658,28 +12663,31 @@ struct AAInvariantLoadPointerImpl /// See AbstractAttribute::getAsStr(). const std::string getAsStr(Attributor *) const override { - std::string Str; - raw_string_ostream OS(Str); - OS << "load invariant pointer: " << isKnown() << '\n'; - return Str; + if (isKnownInvariant()) + return "load-invariant pointer"; + return "non-invariant pointer"; } /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} +protected: + /// Indicate that invariance necessarily requires the pointer to be noalias. + virtual bool requiresNoAlias() const { return false; } + private: + bool isExternal() const { + const auto *F = getAssociatedFunction(); + if (!F) + return true; + return isCallableCC(F->getCallingConv()) && + getPositionKind() != IRP_CALL_SITE_RETURNED; + } + ChangeStatus checkNoAlias(Attributor &A) { if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) return ChangeStatus::UNCHANGED; - const auto *F = getAssociatedFunction(); - - if (F && isCallableCC(F->getCallingConv())) { - // program-wide alias information cannot be inferred - removeAssumedBits(IS_NOALIAS); - return ChangeStatus::CHANGED; - } - // try to use AANoAlias if (const auto *ANoAlias = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED)) { @@ -12696,8 +12704,8 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - // if the function is not callable, try to infer noalias from argument - // attribute, since it is applicable for the duration of the function + // try to infer noalias from argument attribute, since it is applicable for + // the duration of the function if (const auto *Arg = getAssociatedArgument()) { if (Arg->hasNoAliasAttr()) { addKnownBits(IS_NOALIAS); @@ -12717,34 +12725,23 @@ struct AAInvariantLoadPointerImpl if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT)) return ChangeStatus::UNCHANGED; - const auto *F = getAssociatedFunction(); - - if (!F) - return ChangeStatus::UNCHANGED; + if (!getAssociatedFunction()) + return indicatePessimisticFixpoint(); - if (isCallableCC(F->getCallingConv())) { - // effects cannot be tracked outside of function call; - // conservatively assume pointer has effectful uses - removeAssumedBits(IS_NOEFFECT); - return ChangeStatus::CHANGED; - } + const auto HasNoEffectLoads = [&](const Use &U, bool &) { + if (const auto *LI = dyn_cast(U.getUser())) + return !LI->mayHaveSideEffects(); - const auto HasNoSideEffects = [](const Use &U, bool &) { - const auto *I = dyn_cast(U.getUser()); - return !I || !I->mayHaveSideEffects(); + return true; }; - if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) { - removeAssumedBits(IS_NOEFFECT); - return ChangeStatus::CHANGED; - } + if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue())) + return indicatePessimisticFixpoint(); // try to use AAMemoryBehavior to infer readonly attribute if (const auto *AMemoryBehavior = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED)) { - if (!AMemoryBehavior->isAssumedReadOnly()) { - removeAssumedBits(IS_NOEFFECT); - return ChangeStatus::CHANGED; - } + if (!AMemoryBehavior->isAssumedReadOnly()) + return indicatePessimisticFixpoint(); if (AMemoryBehavior->isKnownReadOnly()) { addKnownBits(IS_NOEFFECT); @@ -12762,8 +12759,7 @@ struct AAInvariantLoadPointerImpl // readonly information is not provided, and cannot be inferred from // AAMemoryBehavior - removeAssumedBits(IS_NOEFFECT); - return ChangeStatus::CHANGED; + return indicatePessimisticFixpoint(); } return ChangeStatus::UNCHANGED; @@ -12778,17 +12774,53 @@ struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl { struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl { AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A) : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &) override { + removeAssumedBits(IS_LOCALLY_CONSTRAINED); + } }; struct AAInvariantLoadPointerCallSiteReturned final : AAInvariantLoadPointerImpl { AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &A) override { + const auto *F = getAssociatedFunction(); + assert(F && "no associated function for return from call"); + + // not much we can say about opaque functions + if (F->isDeclaration() || F->isIntrinsic()) { + if (!F->onlyReadsMemory() || !F->hasNoSync()) { + indicatePessimisticFixpoint(); + return; + } + } + AAInvariantLoadPointerImpl::initialize(A); + } + +protected: + virtual bool requiresNoAlias() const override { return true; } }; struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A) : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &) override { + const auto *F = getAssociatedFunction(); + assert(F && "no associated function to argument"); + + if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage()) + removeAssumedBits(IS_LOCALLY_CONSTRAINED); + } + +protected: + virtual bool requiresNoAlias() const override { + const auto *F = getAssociatedFunction(); + assert(F && "no associated function to argument"); + return !isCallableCC(F->getCallingConv()); + } }; struct AAInvariantLoadPointerCallSiteArgument final diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll index b73e6ffafbe4a..4cbf3f8edc8c6 100644 --- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll @@ -4,13 +4,16 @@ @G = addrspace(1) global i32 zeroinitializer, align 4 declare void @clobber(i32) #0 declare ptr addrspace(1) @get_ptr() #0 +declare noalias ptr addrspace(1) @get_noalias_ptr() #0 +declare noalias ptr addrspace(1) @get_untouched_ptr() #1 attributes #0 = { nofree norecurse nosync nounwind willreturn } +attributes #1 = { nofree norecurse nosync nounwind willreturn readonly } define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define void @test_nonkernel( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5:[0-9]+]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -21,9 +24,9 @@ define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( -; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -34,9 +37,9 @@ define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -46,10 +49,10 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -61,9 +64,9 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load volatile i32, ptr addrspace(1) %ptr, align 4 @@ -74,9 +77,9 @@ define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 @@ -86,9 +89,9 @@ define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 @@ -99,9 +102,9 @@ define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_global() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_global( -; AMDGCN-SAME: ) #[[ATTR1]] { +; AMDGCN-SAME: ) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) @G, align 4 @@ -112,7 +115,7 @@ define amdgpu_kernel void @test_global() { define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] ; AMDGCN-NEXT: ret i32 [[VAL]] ; @@ -123,9 +126,9 @@ define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) @@ -133,9 +136,9 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias % ret void } -define internal i32 @test_internal_load(ptr addrspace(1) %ptr) { +define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define internal i32 @test_internal_load( -; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: ret i32 [[VAL]] ; @@ -146,9 +149,9 @@ define internal i32 @test_internal_load(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) @@ -158,7 +161,7 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define internal i32 @test_internal_written( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: ret i32 [[VAL]] ; @@ -169,24 +172,24 @@ define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]] ; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; - store i32 %x, ptr addrspace(1) %ptr %val = call i32 @test_internal_written(ptr addrspace(1) %ptr) + store i32 %x, ptr addrspace(1) %ptr call void @clobber(i32 %val) ret void } define amdgpu_kernel void @test_call_ptr() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr( -; AMDGCN-SAME: ) #[[ATTR1]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]] +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR5]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %ptr = call ptr addrspace(1) @get_ptr() @@ -196,12 +199,41 @@ define amdgpu_kernel void @test_call_ptr() { ret void } +define amdgpu_kernel void @test_call_noalias_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR5]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_noalias_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since %ptr may have been written to before returning + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_call_untouched_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR7:[0-9]+]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_untouched_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void +} + define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false @@ -213,10 +245,10 @@ define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) n define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false @@ -228,18 +260,18 @@ define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, pt define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; entry: @@ -259,18 +291,18 @@ finish: define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] { +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] ; AMDGCN-NEXT: ret void ; entry: From 2c9f8a256b359299f69dc4f036e735a1c711f958 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 3 Jun 2025 09:12:23 -0500 Subject: [PATCH 5/9] Modified checks for unrelated but affected tests It seems the attributor cleans up more dead instructions. --- .../Attributor/dereferenceable-1.ll | 1 - .../Attributor/value-simplify-local-remote.ll | 22 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 07e2d5ea15752..5bff2a2e6b208 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) { ; CHECK-LABEL: define {{[^@]+}}@f7_1 ; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] -; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.true: diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index 374d5ba7ff52b..4767244800d21 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; TUNIT-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]] -; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] ; TUNIT-NEXT: ret [[S]] [[FOO_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; CGSCC-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] ; CGSCC-NEXT: ret [[S]] [[FOO_RET]] ; entry: @@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; TUNIT-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] -; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; TUNIT-NEXT: ret [[S]] [[BAR_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]] -; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; CGSCC-NEXT: ret [[S]] [[BAR_RET]] ; entry: @@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[DATA]], ptr [[DATA_ADDR]], align 8 -; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8 +; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]] ; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8 ; TUNIT-NEXT: ret void ; @@ -342,14 +342,6 @@ define %S.2 @t3.helper() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8 ; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]]) -; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1 -; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1 -; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2 -; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8 -; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2 ; CHECK-NEXT: ret [[S_2]] zeroinitializer ; entry: @@ -508,7 +500,7 @@ define internal %S @t4a(ptr %this) { ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; CGSCC-NEXT: ret [[S]] [[TMP0]] ; entry: @@ -623,6 +615,7 @@ entry: ; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; TUNIT: [[META8]] = !{} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]} ; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} @@ -632,4 +625,5 @@ entry: ; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CGSCC: [[META8]] = !{} ;. From fcbc5a27d02fe5f4ab671abaf0eada1194991815 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 3 Jun 2025 11:01:30 -0500 Subject: [PATCH 6/9] Incorporated feedback. --- .../Transforms/IPO/AttributorAttributes.cpp | 112 ++++++++++-------- 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index cfe7611276feb..66436262bf1f7 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12569,6 +12569,7 @@ struct AAInvariantLoadPointerImpl bool isKnownInvariant() const final { return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED); } + bool isKnownLocallyInvariant() const final { if (isKnown(IS_LOCALLY_INVARIANT)) return true; @@ -12578,6 +12579,7 @@ struct AAInvariantLoadPointerImpl bool isAssumedInvariant() const final { return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED); } + bool isAssumedLocallyInvariant() const final { if (isAssumed(IS_LOCALLY_INVARIANT)) return true; @@ -12585,44 +12587,15 @@ struct AAInvariantLoadPointerImpl } ChangeStatus updateImpl(Attributor &A) override { - if (isKnownInvariant()) - return indicateOptimisticFixpoint(); - ChangeStatus Changed = ChangeStatus::UNCHANGED; - Changed |= checkNoAlias(A); + Changed |= updateNoAlias(A); if (requiresNoAlias() && !isAssumed(IS_NOALIAS)) return indicatePessimisticFixpoint(); - Changed |= checkNoEffect(A); - - // try to infer invariance from underlying objects - const auto *AUO = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED); - - bool UsedAssumedInformation = false; - const auto IsInvariantLoadIfPointer = [&](const Value &V) { - if (!V.getType()->isPointerTy()) - return true; - const auto *IsInvariantLoadPointer = - A.getOrCreateAAFor(IRPosition::value(V), this, - DepClassTy::REQUIRED); - if (IsInvariantLoadPointer->isKnownLocallyInvariant()) - return true; - if (!IsInvariantLoadPointer->isAssumedLocallyInvariant()) - return false; - - UsedAssumedInformation = true; - return true; - }; - if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) - return indicatePessimisticFixpoint(); + Changed |= updateNoEffect(A); - if (!UsedAssumedInformation) { - // pointer is known (not assumed) to be locally invariant - addKnownBits(IS_LOCALLY_INVARIANT); - return Changed; - } + Changed |= updateLocalInvariance(A); return Changed; } @@ -12632,7 +12605,7 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; ChangeStatus Changed = ChangeStatus::UNCHANGED; - Value *Ptr = &getAssociatedValue(); + const Value *Ptr = &getAssociatedValue(); const auto TagInvariantLoads = [&](const Use &U, bool &) { if (U.get() != Ptr) return true; @@ -12649,7 +12622,6 @@ struct AAInvariantLoadPointerImpl return true; if (auto *LI = dyn_cast(I)) { - LI->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(LI->getContext(), {})); Changed = ChangeStatus::CHANGED; @@ -12677,14 +12649,14 @@ struct AAInvariantLoadPointerImpl private: bool isExternal() const { - const auto *F = getAssociatedFunction(); + const Function *F = getAssociatedFunction(); if (!F) return true; return isCallableCC(F->getCallingConv()) && getPositionKind() != IRP_CALL_SITE_RETURNED; } - ChangeStatus checkNoAlias(Attributor &A) { + ChangeStatus updateNoAlias(Attributor &A) { if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) return ChangeStatus::UNCHANGED; @@ -12693,7 +12665,7 @@ struct AAInvariantLoadPointerImpl getIRPosition(), this, DepClassTy::REQUIRED)) { if (ANoAlias->isKnownNoAlias()) { addKnownBits(IS_NOALIAS); - return ChangeStatus::UNCHANGED; + return ChangeStatus::CHANGED; } if (!ANoAlias->isAssumedNoAlias()) { @@ -12706,7 +12678,7 @@ struct AAInvariantLoadPointerImpl // try to infer noalias from argument attribute, since it is applicable for // the duration of the function - if (const auto *Arg = getAssociatedArgument()) { + if (const Argument *Arg = getAssociatedArgument()) { if (Arg->hasNoAliasAttr()) { addKnownBits(IS_NOALIAS); return ChangeStatus::UNCHANGED; @@ -12721,7 +12693,7 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - ChangeStatus checkNoEffect(Attributor &A) { + ChangeStatus updateNoEffect(Attributor &A) { if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT)) return ChangeStatus::UNCHANGED; @@ -12729,10 +12701,8 @@ struct AAInvariantLoadPointerImpl return indicatePessimisticFixpoint(); const auto HasNoEffectLoads = [&](const Use &U, bool &) { - if (const auto *LI = dyn_cast(U.getUser())) - return !LI->mayHaveSideEffects(); - - return true; + const auto *LI = dyn_cast(U.getUser()); + return !LI || !LI->mayHaveSideEffects(); }; if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue())) return indicatePessimisticFixpoint(); @@ -12751,7 +12721,7 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - if (const auto *Arg = getAssociatedArgument()) { + if (const Argument *Arg = getAssociatedArgument()) { if (Arg->onlyReadsMemory()) { addKnownBits(IS_NOEFFECT); return ChangeStatus::UNCHANGED; @@ -12764,6 +12734,47 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } + + ChangeStatus updateLocalInvariance(Attributor &A) { + if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT)) + return ChangeStatus::UNCHANGED; + + // try to infer invariance from underlying objects + const auto *AUO = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED); + if (!AUO) + return ChangeStatus::UNCHANGED; + + bool UsedAssumedInformation = false; + const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) { + if (!V.getType()->isPointerTy()) + return true; + const auto *IsInvariantLoadPointer = + A.getOrCreateAAFor(IRPosition::value(V), this, + DepClassTy::REQUIRED); + // conservatively fail if invariance cannot be inferred + if (!IsInvariantLoadPointer) + return false; + + if (IsInvariantLoadPointer->isKnownLocallyInvariant()) + return true; + if (!IsInvariantLoadPointer->isAssumedLocallyInvariant()) + return false; + + UsedAssumedInformation = true; + return true; + }; + if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer)) + return indicatePessimisticFixpoint(); + + if (!UsedAssumedInformation) { + // pointer is known (not assumed) to be locally invariant + addKnownBits(IS_LOCALLY_INVARIANT); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; + } }; struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl { @@ -12786,7 +12797,7 @@ struct AAInvariantLoadPointerCallSiteReturned final : AAInvariantLoadPointerImpl(IRP, A) {} void initialize(Attributor &A) override { - const auto *F = getAssociatedFunction(); + const Function *F = getAssociatedFunction(); assert(F && "no associated function for return from call"); // not much we can say about opaque functions @@ -12808,16 +12819,21 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { : AAInvariantLoadPointerImpl(IRP, A) {} void initialize(Attributor &) override { - const auto *F = getAssociatedFunction(); + const Function *F = getAssociatedFunction(); assert(F && "no associated function to argument"); - if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage()) + if (!isCallableCC(F->getCallingConv())) { + addKnownBits(IS_LOCALLY_CONSTRAINED); + return; + } + + if (!F->hasLocalLinkage()) removeAssumedBits(IS_LOCALLY_CONSTRAINED); } protected: virtual bool requiresNoAlias() const override { - const auto *F = getAssociatedFunction(); + const Function *F = getAssociatedFunction(); assert(F && "no associated function to argument"); return !isCallableCC(F->getCallingConv()); } From 692876e8aa07ef453c6f94d268f2df59f2b5c5b7 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 3 Jun 2025 16:51:49 -0500 Subject: [PATCH 7/9] Incorporated feedback. --- .../Transforms/IPO/AttributorAttributes.cpp | 33 ++++++++++--------- .../{ => AMDGPU}/tag-invariant-loads.ll | 5 +-- 2 files changed, 20 insertions(+), 18 deletions(-) rename llvm/test/Transforms/Attributor/{ => AMDGPU}/tag-invariant-loads.ll (99%) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 66436262bf1f7..0c267bfa455fa 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12643,11 +12643,22 @@ struct AAInvariantLoadPointerImpl /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} -protected: - /// Indicate that invariance necessarily requires the pointer to be noalias. - virtual bool requiresNoAlias() const { return false; } - private: + /// Indicate that noalias is required for the pointer to be invariant. + bool requiresNoAlias() const { + switch (getPositionKind()) { + default: + return false; + case IRP_CALL_SITE_RETURNED: + return true; + case IRP_ARGUMENT: { + const Function *F = getAssociatedFunction(); + assert(F && "no associated function for argument"); + return !isCallableCC(F->getCallingConv()); + } + } + } + bool isExternal() const { const Function *F = getAssociatedFunction(); if (!F) @@ -12800,7 +12811,7 @@ struct AAInvariantLoadPointerCallSiteReturned final const Function *F = getAssociatedFunction(); assert(F && "no associated function for return from call"); - // not much we can say about opaque functions + // There is not much we can say about opaque functions. if (F->isDeclaration() || F->isIntrinsic()) { if (!F->onlyReadsMemory() || !F->hasNoSync()) { indicatePessimisticFixpoint(); @@ -12809,9 +12820,6 @@ struct AAInvariantLoadPointerCallSiteReturned final } AAInvariantLoadPointerImpl::initialize(A); } - -protected: - virtual bool requiresNoAlias() const override { return true; } }; struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { @@ -12820,7 +12828,7 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { void initialize(Attributor &) override { const Function *F = getAssociatedFunction(); - assert(F && "no associated function to argument"); + assert(F && "no associated function for argument"); if (!isCallableCC(F->getCallingConv())) { addKnownBits(IS_LOCALLY_CONSTRAINED); @@ -12830,13 +12838,6 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { if (!F->hasLocalLinkage()) removeAssumedBits(IS_LOCALLY_CONSTRAINED); } - -protected: - virtual bool requiresNoAlias() const override { - const Function *F = getAssociatedFunction(); - assert(F && "no associated function to argument"); - return !isCallableCC(F->getCallingConv()); - } }; struct AAInvariantLoadPointerCallSiteArgument final diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll similarity index 99% rename from llvm/test/Transforms/Attributor/tag-invariant-loads.ll rename to llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll index 4cbf3f8edc8c6..3cf6759a28b53 100644 --- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll @@ -6,8 +6,6 @@ declare void @clobber(i32) #0 declare ptr addrspace(1) @get_ptr() #0 declare noalias ptr addrspace(1) @get_noalias_ptr() #0 declare noalias ptr addrspace(1) @get_untouched_ptr() #1 -attributes #0 = { nofree norecurse nosync nounwind willreturn } -attributes #1 = { nofree norecurse nosync nounwind willreturn readonly } define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define void @test_nonkernel( @@ -319,6 +317,9 @@ finish: call void @clobber(i32 %val) ret void } + +attributes #0 = { nofree norecurse nosync nounwind willreturn } +attributes #1 = { nofree norecurse nosync nounwind willreturn readonly } ;. ; AMDGCN: [[META0]] = !{} ;. From ee594eaa00fdceb9345ddcec2ab2c5e1844a56b2 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Thu, 5 Jun 2025 14:54:48 -0500 Subject: [PATCH 8/9] Made default `noalias` requirement more conservative. --- .../Transforms/IPO/AttributorAttributes.cpp | 7 +++-- .../Attributor/AMDGPU/tag-invariant-loads.ll | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0c267bfa455fa..b8516985aa4e3 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12648,9 +12648,12 @@ struct AAInvariantLoadPointerImpl bool requiresNoAlias() const { switch (getPositionKind()) { default: - return false; - case IRP_CALL_SITE_RETURNED: + // Conservatively default to require noalias. return true; + case IRP_FLOAT: + case IRP_RETURNED: + case IRP_CALL_SITE: + return false; case IRP_ARGUMENT: { const Function *F = getAssociatedFunction(); assert(F && "no associated function for argument"); diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll index 3cf6759a28b53..699eedba02280 100644 --- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll @@ -45,6 +45,34 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { ret void } +define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_gep( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: ret void +; + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %val = load i32, ptr addrspace(1) %gep, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: ret void +; + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %val = load i32, ptr addrspace(1) %gep, align 4 + call void @clobber(i32 %val) + ret void +} + define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] { From 041d966c3e7b504af7694d249a25565c6eb5c3f4 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Thu, 12 Jun 2025 16:01:26 -0500 Subject: [PATCH 9/9] Added support for certain intrinsics. --- .../Transforms/IPO/AttributorAttributes.cpp | 55 ++++++++---- .../Attributor/AMDGPU/tag-invariant-loads.ll | 85 +++++++++++++------ 2 files changed, 95 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b8516985aa4e3..a933cd4fd886b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12654,6 +12654,11 @@ struct AAInvariantLoadPointerImpl case IRP_RETURNED: case IRP_CALL_SITE: return false; + case IRP_CALL_SITE_RETURNED: { + const auto &CB = cast(getAnchorValue()); + return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + &CB, /*MustPreserveNullness=*/false); + } case IRP_ARGUMENT: { const Function *F = getAssociatedFunction(); assert(F && "no associated function for argument"); @@ -12674,7 +12679,7 @@ struct AAInvariantLoadPointerImpl if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) return ChangeStatus::UNCHANGED; - // try to use AANoAlias + // Try to use AANoAlias. if (const auto *ANoAlias = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED)) { if (ANoAlias->isKnownNoAlias()) { @@ -12690,15 +12695,15 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - // try to infer noalias from argument attribute, since it is applicable for - // the duration of the function + // Try to infer noalias from argument attribute, since it is applicable for + // the duration of the function. if (const Argument *Arg = getAssociatedArgument()) { if (Arg->hasNoAliasAttr()) { addKnownBits(IS_NOALIAS); return ChangeStatus::UNCHANGED; } - // noalias information is not provided, and cannot be inferred, + // Noalias information is not provided, and cannot be inferred, // so we conservatively assume the pointer aliases. removeAssumedBits(IS_NOALIAS); return ChangeStatus::CHANGED; @@ -12721,7 +12726,7 @@ struct AAInvariantLoadPointerImpl if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue())) return indicatePessimisticFixpoint(); - // try to use AAMemoryBehavior to infer readonly attribute + // Try to use AAMemoryBehavior to infer readonly attribute. if (const auto *AMemoryBehavior = A.getOrCreateAAFor( getIRPosition(), this, DepClassTy::REQUIRED)) { if (!AMemoryBehavior->isAssumedReadOnly()) @@ -12741,8 +12746,8 @@ struct AAInvariantLoadPointerImpl return ChangeStatus::UNCHANGED; } - // readonly information is not provided, and cannot be inferred from - // AAMemoryBehavior + // Readonly information is not provided, and cannot be inferred from + // AAMemoryBehavior. return indicatePessimisticFixpoint(); } @@ -12766,7 +12771,7 @@ struct AAInvariantLoadPointerImpl const auto *IsInvariantLoadPointer = A.getOrCreateAAFor(IRPosition::value(V), this, DepClassTy::REQUIRED); - // conservatively fail if invariance cannot be inferred + // Conservatively fail if invariance cannot be inferred. if (!IsInvariantLoadPointer) return false; @@ -12781,8 +12786,18 @@ struct AAInvariantLoadPointerImpl if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer)) return indicatePessimisticFixpoint(); + if (const auto *CB = dyn_cast(&getAnchorValue())) { + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + CB, /*MustPreserveNullness=*/false)) { + for (const Value *Arg : CB->args()) { + if (!IsLocallyInvariantLoadIfPointer(*Arg)) + return indicatePessimisticFixpoint(); + } + } + } + if (!UsedAssumedInformation) { - // pointer is known (not assumed) to be locally invariant + // Pointer is known and not just assumed to be locally invariant. addKnownBits(IS_LOCALLY_INVARIANT); return ChangeStatus::CHANGED; } @@ -12814,14 +12829,20 @@ struct AAInvariantLoadPointerCallSiteReturned final const Function *F = getAssociatedFunction(); assert(F && "no associated function for return from call"); - // There is not much we can say about opaque functions. - if (F->isDeclaration() || F->isIntrinsic()) { - if (!F->onlyReadsMemory() || !F->hasNoSync()) { - indicatePessimisticFixpoint(); - return; - } - } - AAInvariantLoadPointerImpl::initialize(A); + if (!F->isDeclaration() && !F->isIntrinsic()) + return AAInvariantLoadPointerImpl::initialize(A); + + const auto &CB = cast(getAnchorValue()); + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + &CB, /*MustPreserveNullness=*/false)) + return AAInvariantLoadPointerImpl::initialize(A); + + if (F->onlyReadsMemory() && F->hasNoSync()) + return AAInvariantLoadPointerImpl::initialize(A); + + // At this point, the function is opaque, so we conservatively assume + // non-invariance. + indicatePessimisticFixpoint(); } }; diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll index 699eedba02280..ace68a19bf41f 100644 --- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll @@ -11,7 +11,7 @@ define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define void @test_nonkernel( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6:[0-9]+]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -24,7 +24,7 @@ define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( ; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -37,7 +37,7 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -50,7 +50,7 @@ define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) { ; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 @@ -64,7 +64,7 @@ define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) { ; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 @@ -78,7 +78,7 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 ; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) %ptr, align 4 @@ -92,7 +92,7 @@ define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load volatile i32, ptr addrspace(1) %ptr, align 4 @@ -105,7 +105,7 @@ define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 @@ -117,7 +117,7 @@ define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 @@ -130,7 +130,7 @@ define amdgpu_kernel void @test_global() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_global( ; AMDGCN-SAME: ) #[[ATTR2]] { ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = load i32, ptr addrspace(1) @G, align 4 @@ -153,8 +153,8 @@ define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6:[0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) @@ -176,8 +176,8 @@ define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) { define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( ; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) @@ -199,9 +199,9 @@ define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written( ; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]] +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] ; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %val = call i32 @test_internal_written(ptr addrspace(1) %ptr) @@ -213,9 +213,9 @@ define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias % define amdgpu_kernel void @test_call_ptr() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr( ; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR5]] +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR6]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %ptr = call ptr addrspace(1) @get_ptr() @@ -228,9 +228,9 @@ define amdgpu_kernel void @test_call_ptr() { define amdgpu_kernel void @test_call_noalias_ptr() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr( ; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR5]] +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR6]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %ptr = call ptr addrspace(1) @get_noalias_ptr() @@ -243,9 +243,9 @@ define amdgpu_kernel void @test_call_noalias_ptr() { define amdgpu_kernel void @test_call_untouched_ptr() { ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr( ; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR7:[0-9]+]] +; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR8:[0-9]+]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %ptr = call ptr addrspace(1) @get_untouched_ptr() @@ -254,12 +254,41 @@ define amdgpu_kernel void @test_call_untouched_ptr() { ret void } +define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9:[0-9]+]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) + %val = load i32, ptr addrspace(7) %rsrc, align 4 + ;; original %ptr may alias + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) + %val = load i32, ptr addrspace(7) %rsrc, align 4 + call void @clobber(i32 %val) + ret void +} + define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( ; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false @@ -274,7 +303,7 @@ define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, pt ; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false @@ -290,14 +319,14 @@ define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias % ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; entry: @@ -321,14 +350,14 @@ define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrsp ; AMDGCN-NEXT: [[ENTRY:.*:]] ; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] ; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] ; AMDGCN-NEXT: br label %[[FINISH:.*]] ; AMDGCN: [[FALSE]]: ; AMDGCN-NEXT: br label %[[FINISH]] ; AMDGCN: [[FINISH]]: ; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] ; AMDGCN-NEXT: ret void ; entry: