From 0145c57901a24c291c0039d4211e9ecd87137684 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Fri, 23 May 2025 15:41:39 -0700 Subject: [PATCH 1/8] [ConstantFolding] Add folding for [de]interleave2, insert and extract The change adds folding for 4 vector intrinsics: `interleave2`, `deinterleave2`, `vector_extract` and `vector_insert`. For the last 2 intrinsics the change does not use `ShuffleVector` fold mechanism as it's much simpler to construct result vector explicitly. --- llvm/lib/Analysis/ConstantFolding.cpp | 78 +++++++++++++++++++ .../InstSimplify/ConstProp/vector-calls.ll | 50 ++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 7dd7f413783c9..cb5eb48e8e5a7 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1635,6 +1635,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_extract: + case Intrinsic::vector_insert: + case Intrinsic::vector_interleave2: + case Intrinsic::vector_deinterleave2: // Target intrinsics case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: @@ -3750,6 +3754,65 @@ static Constant *ConstantFoldFixedVectorCall( } return nullptr; } + case Intrinsic::vector_extract: { + auto *Vec = dyn_cast(Operands[0]); + auto *Idx = dyn_cast(Operands[1]); + if (!Vec || !Idx) + return nullptr; + + unsigned NumElements = FVTy->getNumElements(); + unsigned VecNumElements = + cast(Vec->getType())->getNumElements(); + // Extracting entire vector is nop + if (NumElements == VecNumElements) + return Vec; + + unsigned StartingIndex = Idx->getZExtValue(); + assert(StartingIndex + NumElements <= VecNumElements && + "Cannot extract more elements than exist in the vector"); + for (unsigned I = 0; I != NumElements; ++I) + Result[I] = Vec->getAggregateElement(StartingIndex + I); + return ConstantVector::get(Result); + } + case Intrinsic::vector_insert: { + auto *Vec = dyn_cast(Operands[0]); + auto *SubVec = dyn_cast(Operands[1]); + auto *Idx = dyn_cast(Operands[2]); + if (!Vec || !SubVec || !Idx) + return nullptr; + + unsigned SubVecNumElements = + cast(SubVec->getType())->getNumElements(); + unsigned VecNumElements = + cast(Vec->getType())->getNumElements(); + unsigned IdxN = Idx->getZExtValue(); + // Replacing entire vector with a subvec is nop + if (SubVecNumElements == VecNumElements) + return SubVec; + + unsigned I = 0; + for (; I < IdxN; ++I) + Result[I] = Vec->getAggregateElement(I); + for (; I < IdxN + SubVecNumElements; ++I) + Result[I] = SubVec->getAggregateElement(I - IdxN); + for (; I < VecNumElements; ++I) + Result[I] = Vec->getAggregateElement(I); + return ConstantVector::get(Result); + } + case Intrinsic::vector_interleave2: { + auto *Vec0 = dyn_cast(Operands[0]); + auto *Vec1 = dyn_cast(Operands[1]); + if (!Vec0 || !Vec1) + return nullptr; + + unsigned NumElements = + cast(Vec0->getType())->getNumElements(); + for (unsigned I = 0; I < NumElements; ++I) { + Result[2 * I] = Vec0->getAggregateElement(I); + Result[2 * I + 1] = Vec1->getAggregateElement(I); + } + return ConstantVector::get(Result); + } default: break; } @@ -3911,6 +3974,21 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, return nullptr; return ConstantStruct::get(StTy, SinResult, CosResult); } + case Intrinsic::vector_deinterleave2: { + auto *Vec = dyn_cast(Operands[0]); + if (!Vec) + return nullptr; + + unsigned NumElements = + cast(Vec->getType())->getNumElements() / 2; + SmallVector Res0(NumElements), Res1(NumElements); + for (unsigned I = 0; I < NumElements; ++I) { + Res0[I] = Vec->getAggregateElement(2 * I); + Res1[I] = Vec->getAggregateElement(2 * I + 1); + } + return ConstantStruct::get(StTy, ConstantVector::get(Res0), + ConstantVector::get(Res1)); + } default: // TODO: Constant folding of vector intrinsics that fall through here does // not work (e.g. overflow intrinsics) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll new file mode 100644 index 0000000000000..f0bf610fa52aa --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s + +define <3 x i32> @fold_vector_extract() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract() { +; CHECK-NEXT: ret <3 x i32> +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 3) + ret <3 x i32> %1 +} + +define <8 x i32> @fold_vector_extract_nop() { +; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() { +; CHECK-NEXT: ret <8 x i32> +; + %1 = call <8 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_insert() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert() { +; CHECK-NEXT: ret <8 x i32> +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <4 x i32> , i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_insert_nop() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert_nop() { +; CHECK-NEXT: ret <8 x i32> +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <8 x i32> , i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_interleave2() { +; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() { +; CHECK-NEXT: ret <8 x i32> +; + %1 = call<8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> , <4 x i32> ) + ret <8 x i32> %1 +} + +define {<4 x i32>, <4 x i32>} @fold_vector_deinterleav2() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleav2() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> } +; + %1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> ) + ret {<4 x i32>, <4 x i32>} %1 +} From 6a37fdcb3d6ef663319a4546604353a0da7f53a7 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Tue, 27 May 2025 11:03:04 -0400 Subject: [PATCH 2/8] Addressed comments --- llvm/lib/Analysis/ConstantFolding.cpp | 29 +++++++++---- .../InstSimplify/ConstProp/vector-calls.ll | 42 ++++++++++++++++++- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index cb5eb48e8e5a7..f64a4c9a17f7e 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3763,15 +3763,21 @@ static Constant *ConstantFoldFixedVectorCall( unsigned NumElements = FVTy->getNumElements(); unsigned VecNumElements = cast(Vec->getType())->getNumElements(); + unsigned StartingIndex = Idx->getZExtValue(); // Extracting entire vector is nop - if (NumElements == VecNumElements) + if (NumElements == VecNumElements && StartingIndex == 0) return Vec; - unsigned StartingIndex = Idx->getZExtValue(); - assert(StartingIndex + NumElements <= VecNumElements && - "Cannot extract more elements than exist in the vector"); - for (unsigned I = 0; I != NumElements; ++I) - Result[I] = Vec->getAggregateElement(StartingIndex + I); + const unsigned NonPoisonNumElements = + std::min(StartingIndex + NumElements, VecNumElements); + for (unsigned I = StartingIndex; I < NonPoisonNumElements; ++I) + Result[I - StartingIndex] = Vec->getAggregateElement(I); + + // Remaining elements are poison since they are out of bounds. + for (unsigned I = NonPoisonNumElements, E = StartingIndex + NumElements; + I < E; ++I) + Result[I - StartingIndex] = PoisonValue::get(FVTy->getElementType()); + return ConstantVector::get(Result); } case Intrinsic::vector_insert: { @@ -3787,9 +3793,15 @@ static Constant *ConstantFoldFixedVectorCall( cast(Vec->getType())->getNumElements(); unsigned IdxN = Idx->getZExtValue(); // Replacing entire vector with a subvec is nop - if (SubVecNumElements == VecNumElements) + if (SubVecNumElements == VecNumElements && IdxN == 0) return SubVec; + // Make sure indices are in the range [0, VecNumElements), otherwise the + // result is a poison value. + if (IdxN >= VecNumElements || IdxN + SubVecNumElements > VecNumElements || + (IdxN && (SubVecNumElements % IdxN) != 0)) + return PoisonValue::get(FVTy); + unsigned I = 0; for (; I < IdxN; ++I) Result[I] = Vec->getAggregateElement(I); @@ -3980,7 +3992,8 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, return nullptr; unsigned NumElements = - cast(Vec->getType())->getNumElements() / 2; + cast(Vec->getType())->getElementCount().getKnownMinValue() / + 2; SmallVector Res0(NumElements), Res1(NumElements); for (unsigned I = 0; I < NumElements; ++I) { Res0[I] = Vec->getAggregateElement(2 * I); diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index f0bf610fa52aa..38c35f28cd11b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s +; RUN: opt < %s -passes=instsimplify,verify -disable-verify -S | FileCheck %s define <3 x i32> @fold_vector_extract() { ; CHECK-LABEL: define <3 x i32> @fold_vector_extract() { @@ -9,6 +9,22 @@ define <3 x i32> @fold_vector_extract() { ret <3 x i32> %1 } +define <3 x i32> @fold_vector_extract_last_poison() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract_last_poison() { +; CHECK-NEXT: ret <3 x i32> +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 6) + ret <3 x i32> %1 +} + +define <3 x i32> @fold_vector_extract_poison() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract_poison() { +; CHECK-NEXT: ret <3 x i32> poison +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 8) + ret <3 x i32> %1 +} + define <8 x i32> @fold_vector_extract_nop() { ; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() { ; CHECK-NEXT: ret <8 x i32> @@ -33,6 +49,22 @@ define <8 x i32> @fold_vector_insert_nop() { ret <8 x i32> %1 } +define <8 x i32> @fold_vector_insert_poison_idx_range() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_idx_range() { +; CHECK-NEXT: ret <8 x i32> poison +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <6 x i32> , i64 6) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_insert_poison_large_idx() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_large_idx() { +; CHECK-NEXT: ret <8 x i32> poison +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <6 x i32> , i64 -2) + ret <8 x i32> %1 +} + define <8 x i32> @fold_vector_interleave2() { ; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() { ; CHECK-NEXT: ret <8 x i32> @@ -48,3 +80,11 @@ define {<4 x i32>, <4 x i32>} @fold_vector_deinterleav2() { %1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> ) ret {<4 x i32>, <4 x i32>} %1 } + +define {, } @fold_scalable_vector_deinterleav2() { +; CHECK-LABEL: define { , } @fold_scalable_vector_deinterleav2() { +; CHECK-NEXT: ret { , } zeroinitializer +; + %1 = call {, } @llvm.vector.deinterleave2.v4i32.v8i32( zeroinitializer) + ret {, } %1 +} From 6c34f443a66a0b532ad9077ea44587b23aaba7f8 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Wed, 28 May 2025 13:15:26 -0400 Subject: [PATCH 3/8] Addressed comments --- llvm/lib/Analysis/ConstantFolding.cpp | 55 ++++++++++++++----- .../InstSimplify/ConstProp/vector-calls.ll | 10 ++++ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index f64a4c9a17f7e..eb0f520958ba5 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3756,22 +3756,27 @@ static Constant *ConstantFoldFixedVectorCall( } case Intrinsic::vector_extract: { auto *Vec = dyn_cast(Operands[0]); - auto *Idx = dyn_cast(Operands[1]); - if (!Vec || !Idx) + auto *Idx = cast(Operands[1]); + if (!Vec || !Idx || !isa(Vec->getType())) return nullptr; unsigned NumElements = FVTy->getNumElements(); unsigned VecNumElements = cast(Vec->getType())->getNumElements(); unsigned StartingIndex = Idx->getZExtValue(); + // Extracting entire vector is nop if (NumElements == VecNumElements && StartingIndex == 0) return Vec; const unsigned NonPoisonNumElements = std::min(StartingIndex + NumElements, VecNumElements); - for (unsigned I = StartingIndex; I < NonPoisonNumElements; ++I) - Result[I - StartingIndex] = Vec->getAggregateElement(I); + for (unsigned I = StartingIndex; I < NonPoisonNumElements; ++I) { + Constant *Elt = Vec->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[I - StartingIndex] = Elt; + } // Remaining elements are poison since they are out of bounds. for (unsigned I = NonPoisonNumElements, E = StartingIndex + NumElements; @@ -3784,7 +3789,7 @@ static Constant *ConstantFoldFixedVectorCall( auto *Vec = dyn_cast(Operands[0]); auto *SubVec = dyn_cast(Operands[1]); auto *Idx = dyn_cast(Operands[2]); - if (!Vec || !SubVec || !Idx) + if (!Vec || !SubVec || !Idx || !isa(Vec->getType())) return nullptr; unsigned SubVecNumElements = @@ -3803,12 +3808,24 @@ static Constant *ConstantFoldFixedVectorCall( return PoisonValue::get(FVTy); unsigned I = 0; - for (; I < IdxN; ++I) - Result[I] = Vec->getAggregateElement(I); - for (; I < IdxN + SubVecNumElements; ++I) - Result[I] = SubVec->getAggregateElement(I - IdxN); - for (; I < VecNumElements; ++I) - Result[I] = Vec->getAggregateElement(I); + for (; I < IdxN; ++I) { + Constant *Elt = Vec->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[I] = Elt; + } + for (; I < IdxN + SubVecNumElements; ++I) { + Constant *Elt = SubVec->getAggregateElement(I - IdxN); + if (!Elt) + return nullptr; + Result[I] = Elt; + } + for (; I < VecNumElements; ++I) { + Constant *Elt = Vec->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[I] = Elt; + } return ConstantVector::get(Result); } case Intrinsic::vector_interleave2: { @@ -3820,8 +3837,12 @@ static Constant *ConstantFoldFixedVectorCall( unsigned NumElements = cast(Vec0->getType())->getNumElements(); for (unsigned I = 0; I < NumElements; ++I) { - Result[2 * I] = Vec0->getAggregateElement(I); - Result[2 * I + 1] = Vec1->getAggregateElement(I); + Constant *Elt0 = Vec0->getAggregateElement(I); + Constant *Elt1 = Vec1->getAggregateElement(I); + if (!Elt0 || !Elt1) + return nullptr; + Result[2 * I] = Elt0; + Result[2 * I + 1] = Elt1; } return ConstantVector::get(Result); } @@ -3996,8 +4017,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, 2; SmallVector Res0(NumElements), Res1(NumElements); for (unsigned I = 0; I < NumElements; ++I) { - Res0[I] = Vec->getAggregateElement(2 * I); - Res1[I] = Vec->getAggregateElement(2 * I + 1); + Constant *Elt0 = Vec->getAggregateElement(2 * I); + Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); + if (!Elt0 || !Elt1) + return nullptr; + Res0[I] = Elt0; + Res1[I] = Elt1; } return ConstantStruct::get(StTy, ConstantVector::get(Res0), ConstantVector::get(Res1)); diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index 38c35f28cd11b..85bd0da6e48a0 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -9,6 +9,16 @@ define <3 x i32> @fold_vector_extract() { ret <3 x i32> %1 } +@a = external global i16, align 1 + +define <3 x i32> @fold_vector_extract_constexpr() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract_constexpr() { +; CHECK-NEXT: ret <3 x i32> +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 0) + ret <3 x i32> %1 +} + define <3 x i32> @fold_vector_extract_last_poison() { ; CHECK-LABEL: define <3 x i32> @fold_vector_extract_last_poison() { ; CHECK-NEXT: ret <3 x i32> From 1145582694bddfc452b363178ab235333c176a18 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Mon, 2 Jun 2025 14:40:13 -0400 Subject: [PATCH 4/8] Addressed latest comments --- llvm/lib/Analysis/ConstantFolding.cpp | 46 +++++++------------ .../InstSimplify/ConstProp/vector-calls.ll | 2 +- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index eb0f520958ba5..ccadbdba40b4c 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3755,9 +3755,9 @@ static Constant *ConstantFoldFixedVectorCall( return nullptr; } case Intrinsic::vector_extract: { - auto *Vec = dyn_cast(Operands[0]); - auto *Idx = cast(Operands[1]); - if (!Vec || !Idx || !isa(Vec->getType())) + auto *Idx = dyn_cast(Operands[1]); + Constant *Vec = Operands[0]; + if (!Idx || !isa(Vec->getType())) return nullptr; unsigned NumElements = FVTy->getNumElements(); @@ -3786,10 +3786,10 @@ static Constant *ConstantFoldFixedVectorCall( return ConstantVector::get(Result); } case Intrinsic::vector_insert: { - auto *Vec = dyn_cast(Operands[0]); - auto *SubVec = dyn_cast(Operands[1]); + Constant *Vec = Operands[0]; + Constant *SubVec = Operands[1]; auto *Idx = dyn_cast(Operands[2]); - if (!Vec || !SubVec || !Idx || !isa(Vec->getType())) + if (!Idx || !isa(Vec->getType())) return nullptr; unsigned SubVecNumElements = @@ -3804,24 +3804,15 @@ static Constant *ConstantFoldFixedVectorCall( // Make sure indices are in the range [0, VecNumElements), otherwise the // result is a poison value. if (IdxN >= VecNumElements || IdxN + SubVecNumElements > VecNumElements || - (IdxN && (SubVecNumElements % IdxN) != 0)) + (IdxN % SubVecNumElements) != 0) return PoisonValue::get(FVTy); - unsigned I = 0; - for (; I < IdxN; ++I) { - Constant *Elt = Vec->getAggregateElement(I); - if (!Elt) - return nullptr; - Result[I] = Elt; - } - for (; I < IdxN + SubVecNumElements; ++I) { - Constant *Elt = SubVec->getAggregateElement(I - IdxN); - if (!Elt) - return nullptr; - Result[I] = Elt; - } - for (; I < VecNumElements; ++I) { - Constant *Elt = Vec->getAggregateElement(I); + for (unsigned I = 0; I < VecNumElements; ++I) { + Constant *Elt; + if (I >= IdxN && I < IdxN + SubVecNumElements) + Elt = SubVec->getAggregateElement(I - IdxN); + else + Elt = Vec->getAggregateElement(I); if (!Elt) return nullptr; Result[I] = Elt; @@ -3829,16 +3820,11 @@ static Constant *ConstantFoldFixedVectorCall( return ConstantVector::get(Result); } case Intrinsic::vector_interleave2: { - auto *Vec0 = dyn_cast(Operands[0]); - auto *Vec1 = dyn_cast(Operands[1]); - if (!Vec0 || !Vec1) - return nullptr; - unsigned NumElements = - cast(Vec0->getType())->getNumElements(); + cast(Operands[0]->getType())->getNumElements(); for (unsigned I = 0; I < NumElements; ++I) { - Constant *Elt0 = Vec0->getAggregateElement(I); - Constant *Elt1 = Vec1->getAggregateElement(I); + Constant *Elt0 = Operands[0]->getAggregateElement(I); + Constant *Elt1 = Operands[1]->getAggregateElement(I); if (!Elt0 || !Elt1) return nullptr; Result[2 * I] = Elt0; diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index 85bd0da6e48a0..73dad53a3b653 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -79,7 +79,7 @@ define <8 x i32> @fold_vector_interleave2() { ; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() { ; CHECK-NEXT: ret <8 x i32> ; - %1 = call<8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> , <4 x i32> ) + %1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> , <4 x i32> ) ret <8 x i32> %1 } From 386bb29bbc66f04c12830ab936b4e15fea272c58 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Tue, 10 Jun 2025 16:01:37 -0400 Subject: [PATCH 5/8] Explicitly handle ConstantAggregateZero --- llvm/lib/Analysis/ConstantFolding.cpp | 32 +++++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index ccadbdba40b4c..5a1e23e33ec5c 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3998,20 +3998,28 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, if (!Vec) return nullptr; - unsigned NumElements = - cast(Vec->getType())->getElementCount().getKnownMinValue() / - 2; + auto VecTy = cast(Vec->getType()); + unsigned NumElements = VecTy->getElementCount().getKnownMinValue() / 2; + SmallVector Res0(NumElements), Res1(NumElements); - for (unsigned I = 0; I < NumElements; ++I) { - Constant *Elt0 = Vec->getAggregateElement(2 * I); - Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); - if (!Elt0 || !Elt1) - return nullptr; - Res0[I] = Elt0; - Res1[I] = Elt1; + if (isa(Vec)) { + auto *HalfVecTy = VectorType::getHalfElementsVectorType(VecTy); + return ConstantStruct::get(StTy, ConstantAggregateZero::get(HalfVecTy), + ConstantAggregateZero::get(HalfVecTy)); } - return ConstantStruct::get(StTy, ConstantVector::get(Res0), - ConstantVector::get(Res1)); + if (isa(Vec->getType())) { + for (unsigned I = 0; I < NumElements; ++I) { + Constant *Elt0 = Vec->getAggregateElement(2 * I); + Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); + if (!Elt0 || !Elt1) + return nullptr; + Res0[I] = Elt0; + Res1[I] = Elt1; + } + return ConstantStruct::get(StTy, ConstantVector::get(Res0), + ConstantVector::get(Res1)); + } + return nullptr; } default: // TODO: Constant folding of vector intrinsics that fall through here does From 77952a586d90bfec8e8cd68029e8c88151e3ba66 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Tue, 10 Jun 2025 18:51:04 -0400 Subject: [PATCH 6/8] addressed comments --- llvm/lib/Analysis/ConstantFolding.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 5a1e23e33ec5c..21ebe42465b66 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3998,16 +3998,15 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, if (!Vec) return nullptr; - auto VecTy = cast(Vec->getType()); + auto *VecTy = cast(Vec->getType()); unsigned NumElements = VecTy->getElementCount().getKnownMinValue() / 2; - - SmallVector Res0(NumElements), Res1(NumElements); if (isa(Vec)) { auto *HalfVecTy = VectorType::getHalfElementsVectorType(VecTy); return ConstantStruct::get(StTy, ConstantAggregateZero::get(HalfVecTy), ConstantAggregateZero::get(HalfVecTy)); } if (isa(Vec->getType())) { + SmallVector Res0(NumElements), Res1(NumElements); for (unsigned I = 0; I < NumElements; ++I) { Constant *Elt0 = Vec->getAggregateElement(2 * I); Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); From f64de79afe95ffdc5c27cd7703f9f008f9068b42 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Wed, 11 Jun 2025 15:28:45 -0400 Subject: [PATCH 7/8] Removed handling of invalid cases --- llvm/lib/Analysis/ConstantFolding.cpp | 18 ++-------- .../InstSimplify/ConstProp/vector-calls.ll | 34 +------------------ 2 files changed, 4 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 21ebe42465b66..72874a7c8e531 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3769,20 +3769,14 @@ static Constant *ConstantFoldFixedVectorCall( if (NumElements == VecNumElements && StartingIndex == 0) return Vec; - const unsigned NonPoisonNumElements = - std::min(StartingIndex + NumElements, VecNumElements); - for (unsigned I = StartingIndex; I < NonPoisonNumElements; ++I) { + for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E; + ++I) { Constant *Elt = Vec->getAggregateElement(I); if (!Elt) return nullptr; Result[I - StartingIndex] = Elt; } - // Remaining elements are poison since they are out of bounds. - for (unsigned I = NonPoisonNumElements, E = StartingIndex + NumElements; - I < E; ++I) - Result[I - StartingIndex] = PoisonValue::get(FVTy->getElementType()); - return ConstantVector::get(Result); } case Intrinsic::vector_insert: { @@ -3801,15 +3795,9 @@ static Constant *ConstantFoldFixedVectorCall( if (SubVecNumElements == VecNumElements && IdxN == 0) return SubVec; - // Make sure indices are in the range [0, VecNumElements), otherwise the - // result is a poison value. - if (IdxN >= VecNumElements || IdxN + SubVecNumElements > VecNumElements || - (IdxN % SubVecNumElements) != 0) - return PoisonValue::get(FVTy); - for (unsigned I = 0; I < VecNumElements; ++I) { Constant *Elt; - if (I >= IdxN && I < IdxN + SubVecNumElements) + if (I < IdxN + SubVecNumElements) Elt = SubVec->getAggregateElement(I - IdxN); else Elt = Vec->getAggregateElement(I); diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index 73dad53a3b653..c61d1c39a6205 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instsimplify,verify -disable-verify -S | FileCheck %s +; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s define <3 x i32> @fold_vector_extract() { ; CHECK-LABEL: define <3 x i32> @fold_vector_extract() { @@ -19,22 +19,6 @@ define <3 x i32> @fold_vector_extract_constexpr() { ret <3 x i32> %1 } -define <3 x i32> @fold_vector_extract_last_poison() { -; CHECK-LABEL: define <3 x i32> @fold_vector_extract_last_poison() { -; CHECK-NEXT: ret <3 x i32> -; - %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 6) - ret <3 x i32> %1 -} - -define <3 x i32> @fold_vector_extract_poison() { -; CHECK-LABEL: define <3 x i32> @fold_vector_extract_poison() { -; CHECK-NEXT: ret <3 x i32> poison -; - %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> , i64 8) - ret <3 x i32> %1 -} - define <8 x i32> @fold_vector_extract_nop() { ; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() { ; CHECK-NEXT: ret <8 x i32> @@ -59,22 +43,6 @@ define <8 x i32> @fold_vector_insert_nop() { ret <8 x i32> %1 } -define <8 x i32> @fold_vector_insert_poison_idx_range() { -; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_idx_range() { -; CHECK-NEXT: ret <8 x i32> poison -; - %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <6 x i32> , i64 6) - ret <8 x i32> %1 -} - -define <8 x i32> @fold_vector_insert_poison_large_idx() { -; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_large_idx() { -; CHECK-NEXT: ret <8 x i32> poison -; - %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> , <6 x i32> , i64 -2) - ret <8 x i32> %1 -} - define <8 x i32> @fold_vector_interleave2() { ; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() { ; CHECK-NEXT: ret <8 x i32> From ab1abeb5911da843a822e874e3d954fe63f5cf72 Mon Sep 17 00:00:00 2001 From: Kolya Panchenko Date: Wed, 11 Jun 2025 18:43:23 -0400 Subject: [PATCH 8/8] fixed typos --- .../Transforms/InstSimplify/ConstProp/vector-calls.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index c61d1c39a6205..9dbe3d4e50ee1 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -51,16 +51,16 @@ define <8 x i32> @fold_vector_interleave2() { ret <8 x i32> %1 } -define {<4 x i32>, <4 x i32>} @fold_vector_deinterleav2() { -; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleav2() { +define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() { ; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> } ; %1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> ) ret {<4 x i32>, <4 x i32>} %1 } -define {, } @fold_scalable_vector_deinterleav2() { -; CHECK-LABEL: define { , } @fold_scalable_vector_deinterleav2() { +define {, } @fold_scalable_vector_deinterleave2() { +; CHECK-LABEL: define { , } @fold_scalable_vector_deinterleave2() { ; CHECK-NEXT: ret { , } zeroinitializer ; %1 = call {, } @llvm.vector.deinterleave2.v4i32.v8i32( zeroinitializer)