diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c628bbb007230..55be0838d464a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6289,6 +6289,47 @@ struct AAUnderlyingObjects : AbstractAttribute { AA::ValueScope Scope = AA::Interprocedural) const = 0; }; +/// An abstract interface for identifying pointers from which loads can be +/// marked invariant. +struct AAInvariantLoadPointer : public AbstractAttribute { + AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {} + + /// See AbstractAttribute::isValidIRPositionForInit + static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { + if (!IRP.getAssociatedType()->isPointerTy()) + return false; + + return AbstractAttribute::isValidIRPositionForInit(A, IRP); + } + + /// Create an abstract attribute view for the position \p IRP. + static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Return true if the pointer's contents are known to remain invariant. + virtual bool isKnownInvariant() const = 0; + virtual bool isKnownLocallyInvariant() const = 0; + + /// Return true if the pointer's contents are assumed to remain invariant. + virtual bool isAssumedInvariant() const = 0; + virtual bool isAssumedLocallyInvariant() const = 0; + + /// See AbstractAttribute::getName(). + StringRef getName() const override { return "AAInvariantLoadPointer"; } + + /// See AbstractAttribute::getIdAddr(). + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAInvariantLoadPointer + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address). + static const char ID; +}; + /// An abstract interface for address space information. struct AAAddressSpace : public StateWrapper { AAAddressSpace(const IRPosition &IRP, Attributor &A) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index cbdbf9ae1494d..1dc576656d12a 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3620,6 +3620,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, UsedAssumedInformation, AA::Intraprocedural); + getOrCreateAAFor( + IRPosition::value(*LI->getPointerOperand())); getOrCreateAAFor( IRPosition::value(*LI->getPointerOperand())); } else { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 470c5308edca4..a933cd4fd886b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability) PIPE_OPERATOR(AAPointerInfo) PIPE_OPERATOR(AAAssumptionInfo) PIPE_OPERATOR(AAUnderlyingObjects) +PIPE_OPERATOR(AAInvariantLoadPointer) PIPE_OPERATOR(AAAddressSpace) PIPE_OPERATOR(AAAllocationInfo) PIPE_OPERATOR(AAIndirectCallInfo) @@ -12534,6 +12535,342 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { }; } // namespace +/// --------------------- Invariant Load Pointer ------------------------------- +namespace { + +struct AAInvariantLoadPointerImpl + : public StateWrapper, + AAInvariantLoadPointer> { + + enum { + // pointer does not alias within the bounds of the function + IS_NOALIAS = 1 << 0, + // pointer is not involved in any effectful instructions within the bounds + // of the function + IS_NOEFFECT = 1 << 1, + // loads are invariant within the bounds of the function + IS_LOCALLY_INVARIANT = 1 << 2, + // memory lifetime is constrained within the bounds of the function + IS_LOCALLY_CONSTRAINED = 1 << 3, + + IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT | + IS_LOCALLY_CONSTRAINED, + }; + static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state"); + + using Base = + StateWrapper, AAInvariantLoadPointer>; + + // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but + // pessimistic about IS_KNOWN_INVARIANT + AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A) + : Base(IRP) {} + + bool isKnownInvariant() const final { + return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED); + } + + bool isKnownLocallyInvariant() const final { + if (isKnown(IS_LOCALLY_INVARIANT)) + return true; + return isKnown(IS_NOALIAS | IS_NOEFFECT); + } + + bool isAssumedInvariant() const final { + return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED); + } + + bool isAssumedLocallyInvariant() const final { + if (isAssumed(IS_LOCALLY_INVARIANT)) + return true; + return isAssumed(IS_NOALIAS | IS_NOEFFECT); + } + + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + Changed |= updateNoAlias(A); + if (requiresNoAlias() && !isAssumed(IS_NOALIAS)) + return indicatePessimisticFixpoint(); + + Changed |= updateNoEffect(A); + + Changed |= updateLocalInvariance(A); + + return Changed; + } + + ChangeStatus manifest(Attributor &A) override { + if (!isKnownInvariant()) + return ChangeStatus::UNCHANGED; + + ChangeStatus Changed = ChangeStatus::UNCHANGED; + const Value *Ptr = &getAssociatedValue(); + const auto TagInvariantLoads = [&](const Use &U, bool &) { + if (U.get() != Ptr) + return true; + auto *I = dyn_cast(U.getUser()); + if (!I) + return true; + + // Ensure that we are only changing uses from the corresponding callgraph + // SSC in the case that the AA isn't run on the entire module + if (!A.isRunOn(I->getFunction())) + return true; + + if (I->hasMetadata(LLVMContext::MD_invariant_load)) + return true; + + if (auto *LI = dyn_cast(I)) { + LI->setMetadata(LLVMContext::MD_invariant_load, + MDNode::get(LI->getContext(), {})); + Changed = ChangeStatus::CHANGED; + } + return true; + }; + + (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr); + return Changed; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr(Attributor *) const override { + if (isKnownInvariant()) + return "load-invariant pointer"; + return "non-invariant pointer"; + } + + /// See AbstractAttribute::trackStatistics(). + void trackStatistics() const override {} + +private: + /// Indicate that noalias is required for the pointer to be invariant. + bool requiresNoAlias() const { + switch (getPositionKind()) { + default: + // Conservatively default to require noalias. + return true; + case IRP_FLOAT: + case IRP_RETURNED: + case IRP_CALL_SITE: + return false; + case IRP_CALL_SITE_RETURNED: { + const auto &CB = cast(getAnchorValue()); + return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + &CB, /*MustPreserveNullness=*/false); + } + case IRP_ARGUMENT: { + const Function *F = getAssociatedFunction(); + assert(F && "no associated function for argument"); + return !isCallableCC(F->getCallingConv()); + } + } + } + + bool isExternal() const { + const Function *F = getAssociatedFunction(); + if (!F) + return true; + return isCallableCC(F->getCallingConv()) && + getPositionKind() != IRP_CALL_SITE_RETURNED; + } + + ChangeStatus updateNoAlias(Attributor &A) { + if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) + return ChangeStatus::UNCHANGED; + + // Try to use AANoAlias. + if (const auto *ANoAlias = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED)) { + if (ANoAlias->isKnownNoAlias()) { + addKnownBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + + if (!ANoAlias->isAssumedNoAlias()) { + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; + } + + // Try to infer noalias from argument attribute, since it is applicable for + // the duration of the function. + if (const Argument *Arg = getAssociatedArgument()) { + if (Arg->hasNoAliasAttr()) { + addKnownBits(IS_NOALIAS); + return ChangeStatus::UNCHANGED; + } + + // Noalias information is not provided, and cannot be inferred, + // so we conservatively assume the pointer aliases. + removeAssumedBits(IS_NOALIAS); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; + } + + ChangeStatus updateNoEffect(Attributor &A) { + if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT)) + return ChangeStatus::UNCHANGED; + + if (!getAssociatedFunction()) + return indicatePessimisticFixpoint(); + + const auto HasNoEffectLoads = [&](const Use &U, bool &) { + const auto *LI = dyn_cast(U.getUser()); + return !LI || !LI->mayHaveSideEffects(); + }; + if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue())) + return indicatePessimisticFixpoint(); + + // Try to use AAMemoryBehavior to infer readonly attribute. + if (const auto *AMemoryBehavior = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED)) { + if (!AMemoryBehavior->isAssumedReadOnly()) + return indicatePessimisticFixpoint(); + + if (AMemoryBehavior->isKnownReadOnly()) { + addKnownBits(IS_NOEFFECT); + return ChangeStatus::UNCHANGED; + } + + return ChangeStatus::UNCHANGED; + } + + if (const Argument *Arg = getAssociatedArgument()) { + if (Arg->onlyReadsMemory()) { + addKnownBits(IS_NOEFFECT); + return ChangeStatus::UNCHANGED; + } + + // Readonly information is not provided, and cannot be inferred from + // AAMemoryBehavior. + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + ChangeStatus updateLocalInvariance(Attributor &A) { + if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT)) + return ChangeStatus::UNCHANGED; + + // try to infer invariance from underlying objects + const auto *AUO = A.getOrCreateAAFor( + getIRPosition(), this, DepClassTy::REQUIRED); + if (!AUO) + return ChangeStatus::UNCHANGED; + + bool UsedAssumedInformation = false; + const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) { + if (!V.getType()->isPointerTy()) + return true; + const auto *IsInvariantLoadPointer = + A.getOrCreateAAFor(IRPosition::value(V), this, + DepClassTy::REQUIRED); + // Conservatively fail if invariance cannot be inferred. + if (!IsInvariantLoadPointer) + return false; + + if (IsInvariantLoadPointer->isKnownLocallyInvariant()) + return true; + if (!IsInvariantLoadPointer->isAssumedLocallyInvariant()) + return false; + + UsedAssumedInformation = true; + return true; + }; + if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer)) + return indicatePessimisticFixpoint(); + + if (const auto *CB = dyn_cast(&getAnchorValue())) { + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + CB, /*MustPreserveNullness=*/false)) { + for (const Value *Arg : CB->args()) { + if (!IsLocallyInvariantLoadIfPointer(*Arg)) + return indicatePessimisticFixpoint(); + } + } + } + + if (!UsedAssumedInformation) { + // Pointer is known and not just assumed to be locally invariant. + addKnownBits(IS_LOCALLY_INVARIANT); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; + } +}; + +struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; + +struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &) override { + removeAssumedBits(IS_LOCALLY_CONSTRAINED); + } +}; + +struct AAInvariantLoadPointerCallSiteReturned final + : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &A) override { + const Function *F = getAssociatedFunction(); + assert(F && "no associated function for return from call"); + + if (!F->isDeclaration() && !F->isIntrinsic()) + return AAInvariantLoadPointerImpl::initialize(A); + + const auto &CB = cast(getAnchorValue()); + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + &CB, /*MustPreserveNullness=*/false)) + return AAInvariantLoadPointerImpl::initialize(A); + + if (F->onlyReadsMemory() && F->hasNoSync()) + return AAInvariantLoadPointerImpl::initialize(A); + + // At this point, the function is opaque, so we conservatively assume + // non-invariance. + indicatePessimisticFixpoint(); + } +}; + +struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} + + void initialize(Attributor &) override { + const Function *F = getAssociatedFunction(); + assert(F && "no associated function for argument"); + + if (!isCallableCC(F->getCallingConv())) { + addKnownBits(IS_LOCALLY_CONSTRAINED); + return; + } + + if (!F->hasLocalLinkage()) + removeAssumedBits(IS_LOCALLY_CONSTRAINED); + } +}; + +struct AAInvariantLoadPointerCallSiteArgument final + : AAInvariantLoadPointerImpl { + AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAInvariantLoadPointerImpl(IRP, A) {} +}; +} // namespace + /// ------------------------ Address Space ------------------------------------ namespace { @@ -13031,6 +13368,7 @@ const char AAInterFnReachability::ID = 0; const char AAPointerInfo::ID = 0; const char AAAssumptionInfo::ID = 0; const char AAUnderlyingObjects::ID = 0; +const char AAInvariantLoadPointer::ID = 0; const char AAAddressSpace::ID = 0; const char AAAllocationInfo::ID = 0; const char AAIndirectCallInfo::ID = 0; @@ -13165,6 +13503,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo) diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll new file mode 100644 index 0000000000000..ace68a19bf41f --- /dev/null +++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll @@ -0,0 +1,382 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN + +@G = addrspace(1) global i32 zeroinitializer, align 4 +declare void @clobber(i32) #0 +declare ptr addrspace(1) @get_ptr() #0 +declare noalias ptr addrspace(1) @get_noalias_ptr() #0 +declare noalias ptr addrspace(1) @get_untouched_ptr() #1 + +define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define void @test_nonkernel( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6:[0-9]+]] +; AMDGCN-NEXT: ret void +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be !invariant.load, as the caller may modify %ptr + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( +; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_gep( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %val = load i32, ptr addrspace(1) %gep, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %val = load i32, ptr addrspace(1) %gep, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; cannot be !invariant.load due to the write to %ptr + store i32 %swap, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load volatile i32, ptr addrspace(1) %ptr, align 4 + ;; volatiles loads cannot be !invariant.load + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 + ;; atomic loads with ordering guarantees may have side effects + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_global() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_global( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = load i32, ptr addrspace(1) @G, align 4 + ;; is not an !invariant.load as global variables may change + call void @clobber(i32 %val) + ret void +} + +define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; is an !invariant.load due to its only caller @test_call_internal_noalias + ret i32 %val +} + +define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7:[0-9]+]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) + call void @clobber(i32 %val) + ret void +} + +define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_load( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since the pointer in @test_call_internal may alias + ret i32 %val +} + +define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) + call void @clobber(i32 %val) + ret void +} + +define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define internal i32 @test_internal_written( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: ret i32 [[VAL]] +; + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written + ret i32 %val +} + +define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] +; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %val = call i32 @test_internal_written(ptr addrspace(1) %ptr) + store i32 %x, ptr addrspace(1) %ptr + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_call_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR6]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since %ptr may alias + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_call_noalias_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR6]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_noalias_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; may not be an !invariant.load since %ptr may have been written to before returning + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_call_untouched_ptr() { +; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr( +; AMDGCN-SAME: ) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR8:[0-9]+]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %ptr = call ptr addrspace(1) @get_untouched_ptr() + %val = load i32, ptr addrspace(1) %ptr, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer( +; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9:[0-9]+]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) + %val = load i32, ptr addrspace(7) %rsrc, align 4 + ;; original %ptr may alias + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias( +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) + %val = load i32, ptr addrspace(7) %rsrc, align 4 + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; either pointer yields an !invariant.load + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias( +; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; + %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; %ptr.false may alias, so no !invariant.load + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load( +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[ENTRY:.*:]] +; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; AMDGCN: [[TRUE]]: +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] +; AMDGCN-NEXT: br label %[[FINISH:.*]] +; AMDGCN: [[FALSE]]: +; AMDGCN-NEXT: br label %[[FINISH]] +; AMDGCN: [[FINISH]]: +; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; +entry: + br i1 %cond, label %true, label %false +true: + call void @clobber(i32 1) + br label %finish +false: + br label %finish +finish: + %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; either pointer yields an !invariant.load + call void @clobber(i32 %val) + ret void +} + +define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { +; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias( +; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { +; AMDGCN-NEXT: [[ENTRY:.*:]] +; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; AMDGCN: [[TRUE]]: +; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] +; AMDGCN-NEXT: br label %[[FINISH:.*]] +; AMDGCN: [[FALSE]]: +; AMDGCN-NEXT: br label %[[FINISH]] +; AMDGCN: [[FINISH]]: +; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] +; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 +; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] +; AMDGCN-NEXT: ret void +; +entry: + br i1 %cond, label %true, label %false +true: + call void @clobber(i32 1) + br label %finish +false: + br label %finish +finish: + %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] + %val = load i32, ptr addrspace(1) %ptr, align 4 + ;; ptr.false may alias, so no !invariant.load + call void @clobber(i32 %val) + ret void +} + +attributes #0 = { nofree norecurse nosync nounwind willreturn } +attributes #1 = { nofree norecurse nosync nounwind willreturn readonly } +;. +; AMDGCN: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 07e2d5ea15752..5bff2a2e6b208 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) { ; CHECK-LABEL: define {{[^@]+}}@f7_1 ; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] -; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.true: diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index 374d5ba7ff52b..4767244800d21 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; TUNIT-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]] -; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] ; TUNIT-NEXT: ret [[S]] [[FOO_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; CGSCC-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] ; CGSCC-NEXT: ret [[S]] [[FOO_RET]] ; entry: @@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; TUNIT-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] -; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; TUNIT-NEXT: ret [[S]] [[BAR_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]] -; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; CGSCC-NEXT: ret [[S]] [[BAR_RET]] ; entry: @@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[DATA]], ptr [[DATA_ADDR]], align 8 -; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8 +; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]] ; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8 ; TUNIT-NEXT: ret void ; @@ -342,14 +342,6 @@ define %S.2 @t3.helper() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8 ; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]]) -; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1 -; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1 -; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2 -; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8 -; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2 ; CHECK-NEXT: ret [[S_2]] zeroinitializer ; entry: @@ -508,7 +500,7 @@ define internal %S @t4a(ptr %this) { ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8 +; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] ; CGSCC-NEXT: ret [[S]] [[TMP0]] ; entry: @@ -623,6 +615,7 @@ entry: ; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; TUNIT: [[META8]] = !{} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]} ; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} @@ -632,4 +625,5 @@ entry: ; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CGSCC: [[META8]] = !{} ;.