diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index fea2ede8b5ab4..03a49cf22bcdd 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -393,8 +393,6 @@ class MemoryDepChecker { uint64_t MaxStride; std::optional CommonStride; - bool ShouldRetryWithRuntimeCheck; - /// TypeByteSize is either the common store size of both accesses, or 0 when /// store sizes mismatch. uint64_t TypeByteSize; @@ -404,11 +402,9 @@ class MemoryDepChecker { DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, std::optional CommonStride, - bool ShouldRetryWithRuntimeCheck, uint64_t TypeByteSize, bool AIsWrite, bool BIsWrite) : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), - ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck), TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f377da3926b26..9dff5bb4af5fa 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1979,14 +1979,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( if (StrideAScaled == StrideBScaled) CommonStride = StrideAScaled; - // TODO: Historically, we don't retry with runtime checks unless the - // (unscaled) strides are the same. Fix this once the condition for runtime - // checks in isDependent is fixed. - bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt; + // TODO: FoundNonConstantDistanceDependence is used as a necessary condition + // to consider retrying with runtime checks. Historically, we did not set it + // when (unscaled) strides were different but there is no inherent reason to. + if (!isa(Dist)) + FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt; return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, - ShouldRetryWithRuntimeCheck, TypeByteSize, - AIsWrite, BIsWrite); + TypeByteSize, AIsWrite, BIsWrite); } MemoryDepChecker::Dependence::DepType @@ -2001,15 +2001,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (std::holds_alternative(Res)) return std::get(Res); - auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck, - TypeByteSize, AIsWrite, BIsWrite] = + auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; if (isa(Dist)) { - // TODO: Relax requirement that there is a common unscaled stride to retry - // with non-constant distance dependencies. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); return Dependence::Unknown; } @@ -2068,14 +2064,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // forward dependency will allow vectorization using any width. if (IsTrueDataDependence && EnableForwardingConflictDetection) { - if (!ConstDist) { - // TODO: FoundNonConstantDistanceDependence is used as a necessary - // condition to consider retrying with runtime checks. Historically, we - // did not set it when strides were different but there is no inherent - // reason to. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; + if (!ConstDist) return Dependence::Unknown; - } if (!HasSameSize || couldPreventStoreLoadForward( ConstDist->abs().getZExtValue(), TypeByteSize)) { LLVM_DEBUG( @@ -2090,22 +2080,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue(); // Below we only handle strictly positive distances. - if (MinDistance <= 0) { - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; + if (MinDistance <= 0) return Dependence::Unknown; - } - - if (!ConstDist) { - // Previously this case would be treated as Unknown, possibly setting - // FoundNonConstantDistanceDependence to force re-trying with runtime - // checks. Until the TODO below is addressed, set it here to preserve - // original behavior w.r.t. re-trying with runtime checks. - // TODO: FoundNonConstantDistanceDependence is used as a necessary - // condition to consider retrying with runtime checks. Historically, we - // did not set it when strides were different but there is no inherent - // reason to. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; - } if (!HasSameSize) { LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll index 595fb021ec0ab..63abd4ef70d63 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll @@ -320,28 +320,34 @@ exit: define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) { ; CHECK-LABEL: 'retry_after_dep_check_with_unknown_offset' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l.A = load float, ptr %A, align 4 -> -; CHECK-NEXT: store float 0.000000e+00, ptr %A.100.iv.offset.3, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l.A = load float, ptr %A, align 4 -> -; CHECK-NEXT: store float %l.A, ptr %A.100.iv, align 8 -; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: ptr %A +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group GRP1: +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: ptr %A ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %A High: (4 + %A)) -; CHECK-NEXT: Member: %A +; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64)) + %A)) +; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64)) + %A) High: (96 + (16 * (zext i32 %offset to i64)) + %A)) ; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64)) + %A),+,8}<%loop> ; CHECK-NEXT: Group GRP2: -; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64)) + %A)) -; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop> +; CHECK-NEXT: (Low: %A High: (4 + %A)) +; CHECK-NEXT: Member: %A ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: