Skip to content

[SandboxVec] Add a simple pack reuse pass #141848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===- PackReuse.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A pack de-duplication pass.
//

#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H

#include "llvm/ADT/StringRef.h"
#include "llvm/SandboxIR/Pass.h"
#include "llvm/SandboxIR/Region.h"

namespace llvm::sandboxir {

/// This pass aims at de-duplicating packs, i.e., try to reuse already existing
/// pack patterns instead of keeping both.
/// This is useful because even though the duplicates will most probably be
/// optimized away by future passes, their added cost can make vectorization
/// more conservative than it should be.
class PackReuse final : public RegionPass {
bool Change = false;

public:
PackReuse() : RegionPass("pack-reuse") {}
bool runOnRegion(Region &Rgn, const Analyses &A) final;
};

} // namespace llvm::sandboxir

#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,25 @@
#include "llvm/SandboxIR/Type.h"
#include "llvm/SandboxIR/Utils.h"

namespace llvm::sandboxir {
namespace llvm {
/// Traits for DenseMap.
template <> struct DenseMapInfo<SmallVector<sandboxir::Value *>> {
static inline SmallVector<sandboxir::Value *> getEmptyKey() {
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-1});
}
static inline SmallVector<sandboxir::Value *> getTombstoneKey() {
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-2});
}
static unsigned getHashValue(const SmallVector<sandboxir::Value *> &Vec) {
return hash_combine_range(Vec.begin(), Vec.end());
}
static bool isEqual(const SmallVector<sandboxir::Value *> &Vec1,
const SmallVector<sandboxir::Value *> &Vec2) {
return Vec1 == Vec2;
}
};

namespace sandboxir {

class VecUtils {
public:
Expand Down Expand Up @@ -179,13 +197,79 @@ class VecUtils {
/// \Returns the first integer power of 2 that is <= Num.
static unsigned getFloorPowerOf2(unsigned Num);

/// Helper struct for `matchPack()`. Describes the instructions and operands
/// of a pack pattern.
struct PackPattern {
/// The insertelement instructions that form the pack pattern in bottom-up
/// order, i.e., the first instruction in `Instrs` is the bottom-most
/// InsertElement instruction of the pack pattern.
/// For example in this simple pack pattern:
/// %Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0
/// %Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1
/// this is [ %Pack1, %Pack0 ].
SmallVector<Instruction *> Instrs;
/// The "external" operands of the pack pattern, i.e., the values that get
/// packed into a vector, skipping the ones in `Instrs`. The operands are in
/// bottom-up order, starting from the operands of the bottom-most insert.
/// So in our example this would be [ %v1, %v0 ].
SmallVector<Value *> Operands;
};

/// If \p I is the last instruction of a pack pattern (i.e., an InsertElement
/// into a vector), then this function returns the instructions in the pack
/// and the operands in the pack, else returns nullopt.
/// Here is an example of a matched pattern:
/// %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0
/// %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1
/// TODO: this currently detects only simple canonicalized patterns.
static std::optional<PackPattern> matchPack(Instruction *I) {
// TODO: Support vector pack patterns.
// TODO: Support out-of-order inserts.

// Early return if `I` is not an Insert.
if (!isa<InsertElementInst>(I))
return std::nullopt;
auto *BB0 = I->getParent();
// The pack contains as many instrs as the lanes of the bottom-most Insert
unsigned ExpectedNumInserts = VecUtils::getNumLanes(I);
assert(ExpectedNumInserts >= 2 && "Expected at least 2 inserts!");
PackPattern Pack;
Pack.Operands.resize(ExpectedNumInserts);
// Collect the inserts by walking up the use-def chain.
Instruction *InsertI = I;
for (auto ExpectedLane : reverse(seq<unsigned>(ExpectedNumInserts))) {
if (InsertI == nullptr)
return std::nullopt;
if (InsertI->getParent() != BB0)
return std::nullopt;
// Check the lane.
auto *LaneC = dyn_cast<ConstantInt>(InsertI->getOperand(2));
if (LaneC == nullptr || LaneC->getSExtValue() != ExpectedLane)
return std::nullopt;
Pack.Instrs.push_back(InsertI);
Pack.Operands[ExpectedLane] = InsertI->getOperand(1);

Value *Op = InsertI->getOperand(0);
if (ExpectedLane == 0) {
// Check the topmost insert. The operand should be a Poison.
if (!isa<PoisonValue>(Op))
return std::nullopt;
} else {
InsertI = dyn_cast<InsertElementInst>(Op);
}
}
return Pack;
}

#ifndef NDEBUG
/// Helper dump function for debugging.
LLVM_DUMP_METHOD static void dump(ArrayRef<Value *> Bndl);
LLVM_DUMP_METHOD static void dump(ArrayRef<Instruction *> Bndl);
#endif // NDEBUG
};

} // namespace llvm::sandboxir
} // namespace sandboxir

} // namespace llvm

#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMVectorize
SandboxVectorizer/Interval.cpp
SandboxVectorizer/Legality.cpp
SandboxVectorizer/Passes/BottomUpVec.cpp
SandboxVectorizer/Passes/PackReuse.cpp
SandboxVectorizer/Passes/RegionsFromBBs.cpp
SandboxVectorizer/Passes/RegionsFromMetadata.cpp
SandboxVectorizer/Passes/SeedCollection.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===- PackReuse.cpp - A pack de-duplication pass -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"

namespace llvm::sandboxir {

bool PackReuse::runOnRegion(Region &Rgn, const Analyses &A) {
if (Rgn.empty())
return Change;
// The key to the map is the ordered operands of the pack.
// The value is a vector of all Pack Instrs with the same operands.
DenseMap<std::pair<BasicBlock *, SmallVector<Value *>>,
SmallVector<SmallVector<Instruction *>>>
PacksMap;
// Go over the region and look for pack patterns.
for (auto *I : Rgn) {
auto PackOpt = VecUtils::matchPack(I);
if (PackOpt) {
// TODO: For now limit pack reuse within a BB.
BasicBlock *BB = (*PackOpt->Instrs.front()).getParent();
PacksMap[{BB, PackOpt->Operands}].push_back(PackOpt->Instrs);
}
}
for (auto &Pair : PacksMap) {
auto &Packs = Pair.second;
if (Packs.size() <= 1)
continue;
// Sort packs by program order.
sort(Packs, [](const auto &PackInstrs1, const auto &PackInstrs2) {
return PackInstrs1.front()->comesBefore(PackInstrs2.front());
});
Instruction *TopMostPack = Packs[0].front();
// Replace duplicate packs with the first one.
for (const auto &PackInstrs :
make_range(std::next(Packs.begin()), Packs.end())) {
PackInstrs.front()->replaceAllUsesWith(TopMostPack);
// Delete the pack instrs bottom-up since they are now dead.
for (auto *PackI : PackInstrs)
PackI->eraseFromParent();
}
Change = true;
}
return Change;
}

} // namespace llvm::sandboxir
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#endif

REGION_PASS("null", ::llvm::sandboxir::NullPass)
REGION_PASS("pack-reuse", ::llvm::sandboxir::PackReuse)
REGION_PASS("print-instruction-count", ::llvm::sandboxir::PrintInstructionCount)
REGION_PASS("print-region", ::llvm::sandboxir::PrintRegion)
REGION_PASS("tr-save", ::llvm::sandboxir::TransactionSave)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintRegion.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h"
Expand Down
71 changes: 71 additions & 0 deletions llvm/test/Transforms/SandboxVectorizer/pack_reuse_basic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-passes="regions-from-metadata<pack-reuse>" %s -S | FileCheck %s

define void @pack_reuse(i8 %v0, i8 %v1, ptr %ptr) {
; CHECK-LABEL: define void @pack_reuse(
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META0:![0-9]+]]
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META0]]
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
; CHECK-NEXT: ret void
;
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0

; Should reuse PackA1.
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0

; Should remain.
%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
ret void
}

; TODO: For now we don't support reusing packs from earlier BBs.
define void @pack_cross_bb(i8 %v0, i8 %v1, ptr %ptr) {
; CHECK-LABEL: define void @pack_cross_bb(
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1:![0-9]+]]
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[PACKB0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKB1:%.*]] = insertelement <2 x i8> [[PACKB0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKB1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META1]]
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META1]]
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
; CHECK-NEXT: ret void
;
entry:
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0
br label %bb

bb:
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0

%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
ret void
}

!0 = distinct !{!"sandboxregion"}
;.
; CHECK: [[META0]] = distinct !{!"sandboxregion"}
; CHECK: [[META1]] = distinct !{!"sandboxregion"}
;.
45 changes: 45 additions & 0 deletions llvm/test/Transforms/SandboxVectorizer/pack_reuse_end_to_end.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,tr-accept>" %s -S | FileCheck %s --check-prefix NOREUSE
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,pack-reuse,tr-accept>" %s -S | FileCheck %s --check-prefix PKREUSE

define void @pack_reuse(ptr %ptr, ptr %ptrX, ptr %ptrY) {
; NOREUSE-LABEL: define void @pack_reuse(
; NOREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
; NOREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
; NOREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
; NOREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
; NOREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; NOREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
; NOREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK1]], [[PACK3]], !sandboxvec [[META0]]
; NOREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
; NOREUSE-NEXT: ret void
;
; PKREUSE-LABEL: define void @pack_reuse(
; PKREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
; PKREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
; PKREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
; PKREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
; PKREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
; PKREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
; PKREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK3]], [[PACK3]], !sandboxvec [[META0]]
; PKREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
; PKREUSE-NEXT: ret void
;
%ldX = load float, ptr %ptrX
%ldY = load float, ptr %ptrY

%ptr0 = getelementptr float, ptr %ptr, i32 0
%ptr1 = getelementptr float, ptr %ptr, i32 1
%sub0 = fsub float %ldX, %ldX
%sub1 = fsub float %ldY, %ldY
store float %sub0, ptr %ptr0
store float %sub1, ptr %ptr1
ret void
}
;.
; NOREUSE: [[META0]] = distinct !{!"sandboxregion"}
;.
; PKREUSE: [[META0]] = distinct !{!"sandboxregion"}
;.
Loading
Loading