Skip to content

Commit 0d5db4e

Browse files
authored
[AArch64][GlobalISel] Bitcast and Build Illegal G_CONCAT_VECTOR Instructions (#96492)
Attempts to handle illegal G_CONCAT_VECTOR instructions by bitcasting the source into scalar values and using G_BUILD_VECTOR instead Treating the G_CONCAT_VECTORS instruction in the legalization artefact by folding away concat(bitcast, ...) into buildvector(...) would require check for ImpDef created by the shuffles in llvm.
1 parent 03d8f95 commit 0d5db4e

File tree

7 files changed

+432
-41
lines changed

7 files changed

+432
-41
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,8 @@ class LegalizerHelper {
373373
/// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
374374
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
375375
LLT CastTy);
376+
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
377+
LLT CastTy);
376378

377379
LegalizeResult lowerConstant(MachineInstr &MI);
378380
LegalizeResult lowerFConstant(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3421,6 +3421,54 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
34213421
return UnableToLegalize;
34223422
}
34233423

3424+
// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3425+
// those that have smaller than legal operands.
3426+
//
3427+
// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3428+
//
3429+
// ===>
3430+
//
3431+
// s32 = G_BITCAST <4 x s8>
3432+
// s32 = G_BITCAST <4 x s8>
3433+
// s32 = G_BITCAST <4 x s8>
3434+
// s32 = G_BITCAST <4 x s8>
3435+
// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3436+
// <16 x s8> = G_BITCAST <4 x s32>
3437+
LegalizerHelper::LegalizeResult
3438+
LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
3439+
LLT CastTy) {
3440+
// Convert it to CONCAT instruction
3441+
auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3442+
if (!ConcatMI) {
3443+
return UnableToLegalize;
3444+
}
3445+
3446+
// Check if bitcast is Legal
3447+
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3448+
LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3449+
3450+
// Check if the build vector is Legal
3451+
if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3452+
return UnableToLegalize;
3453+
}
3454+
3455+
// Bitcast the sources
3456+
SmallVector<Register> BitcastRegs;
3457+
for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3458+
BitcastRegs.push_back(
3459+
MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3460+
.getReg(0));
3461+
}
3462+
3463+
// Build the scalar values into a vector
3464+
Register BuildReg =
3465+
MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3466+
MIRBuilder.buildBitcast(DstReg, BuildReg);
3467+
3468+
MI.eraseFromParent();
3469+
return Legalized;
3470+
}
3471+
34243472
LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
34253473
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
34263474
Register DstReg = LoadMI.getDstReg();
@@ -3725,6 +3773,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
37253773
return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
37263774
case TargetOpcode::G_INSERT_VECTOR_ELT:
37273775
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3776+
case TargetOpcode::G_CONCAT_VECTORS:
3777+
return bitcastConcatVector(MI, TypeIdx, CastTy);
37283778
default:
37293779
return UnableToLegalize;
37303780
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,21 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10061006
.clampNumElements(0, v2s64, v2s64);
10071007

10081008
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1009-
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
1009+
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1010+
.bitcastIf(
1011+
[=](const LegalityQuery &Query) {
1012+
return Query.Types[0].getSizeInBits() <= 128 &&
1013+
Query.Types[1].getSizeInBits() <= 64;
1014+
},
1015+
[=](const LegalityQuery &Query) {
1016+
const LLT DstTy = Query.Types[0];
1017+
const LLT SrcTy = Query.Types[1];
1018+
return std::pair(
1019+
0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1020+
.changeElementCount(
1021+
DstTy.getElementCount().divideCoefficientBy(
1022+
SrcTy.getNumElements())));
1023+
});
10101024

10111025
getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
10121026

Lines changed: 93 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -global-isel-abort=1 -verify-machineinstrs -o - | FileCheck %s
2+
# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -global-isel-abort=2 -verify-machineinstrs -o - | FileCheck %s
33

44
---
55
name: legal_v4s32_v2s32
@@ -9,11 +9,12 @@ body: |
99
liveins: $d0, $d1
1010
; CHECK-LABEL: name: legal_v4s32_v2s32
1111
; CHECK: liveins: $d0, $d1
12-
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
13-
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
14-
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
15-
; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
16-
; CHECK: RET_ReallyLR
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
15+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
16+
; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
17+
; CHECK-NEXT: RET_ReallyLR
1718
%0:_(<2 x s32>) = COPY $d0
1819
%1:_(<2 x s32>) = COPY $d1
1920
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
@@ -28,11 +29,12 @@ body: |
2829
liveins: $d0, $d1
2930
; CHECK-LABEL: name: legal_v8s16_v4s16
3031
; CHECK: liveins: $d0, $d1
31-
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
32-
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
33-
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
34-
; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
35-
; CHECK: RET_ReallyLR
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
34+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
35+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
36+
; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
37+
; CHECK-NEXT: RET_ReallyLR
3638
%0:_(<4 x s16>) = COPY $d0
3739
%1:_(<4 x s16>) = COPY $d1
3840
%2:_(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
@@ -47,14 +49,89 @@ body: |
4749
liveins: $q0
4850
; CHECK-LABEL: name: legal_v16s8_v8s8
4951
; CHECK: liveins: $q0
50-
; CHECK: %a:_(<8 x s8>) = G_IMPLICIT_DEF
51-
; CHECK: %b:_(<8 x s8>) = G_IMPLICIT_DEF
52-
; CHECK: %concat:_(<16 x s8>) = G_CONCAT_VECTORS %a(<8 x s8>), %b(<8 x s8>)
53-
; CHECK: $q0 = COPY %concat(<16 x s8>)
54-
; CHECK: RET_ReallyLR implicit $q0
52+
; CHECK-NEXT: {{ $}}
53+
; CHECK-NEXT: %a:_(<8 x s8>) = G_IMPLICIT_DEF
54+
; CHECK-NEXT: %b:_(<8 x s8>) = G_IMPLICIT_DEF
55+
; CHECK-NEXT: %concat:_(<16 x s8>) = G_CONCAT_VECTORS %a(<8 x s8>), %b(<8 x s8>)
56+
; CHECK-NEXT: $q0 = COPY %concat(<16 x s8>)
57+
; CHECK-NEXT: RET_ReallyLR implicit $q0
5558
%a:_(<8 x s8>) = G_IMPLICIT_DEF
5659
%b:_(<8 x s8>) = G_IMPLICIT_DEF
5760
%concat:_(<16 x s8>) = G_CONCAT_VECTORS %a:_(<8 x s8>), %b:_(<8 x s8>)
5861
$q0 = COPY %concat(<16 x s8>)
5962
RET_ReallyLR implicit $q0
6063
...
64+
---
65+
name: illegal_v16s8_v4s8
66+
tracksRegLiveness: true
67+
body: |
68+
bb.0:
69+
liveins: $x0
70+
71+
; CHECK-LABEL: name: illegal_v16s8_v4s8
72+
; CHECK: liveins: $x0
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: %a:_(p0) = COPY $x0
75+
; CHECK-NEXT: %b:_(s32) = G_LOAD %a(p0) :: (load (s32))
76+
; CHECK-NEXT: %c:_(<4 x s8>) = G_BITCAST %b(s32)
77+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
78+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
79+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
80+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
81+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
82+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s16>)
83+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[TRUNC]](<8 x s8>)
84+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST %c(<4 x s8>)
85+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<4 x s8>)
86+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<4 x s8>)
87+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<4 x s8>)
88+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[BITCAST1]](s32), [[BITCAST2]](s32), [[BITCAST3]](s32)
89+
; CHECK-NEXT: %f:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR1]](<4 x s32>)
90+
; CHECK-NEXT: $q0 = COPY %f(<16 x s8>)
91+
; CHECK-NEXT: RET_ReallyLR implicit $q0
92+
%a:_(p0) = COPY $x0
93+
%b:_(s32) = G_LOAD %a:_(p0) :: (load (s32))
94+
%c:_(<4 x s8>) = G_BITCAST %b:_(s32)
95+
%d:_(s8) = G_IMPLICIT_DEF
96+
%e:_(<4 x s8>) = G_BUILD_VECTOR %d:_(s8), %d:_(s8), %d:_(s8), %d:_(s8)
97+
%f:_(<16 x s8>) = G_CONCAT_VECTORS %c:_(<4 x s8>), %e:_(<4 x s8>), %e:_(<4 x s8>), %e:_(<4 x s8>)
98+
99+
$q0 = COPY %f(<16 x s8>)
100+
RET_ReallyLR implicit $q0
101+
...
102+
---
103+
name: illegal_v8s16_v2s16
104+
tracksRegLiveness: true
105+
body: |
106+
bb.0:
107+
liveins: $x0
108+
109+
; CHECK-LABEL: name: illegal_v8s16_v2s16
110+
; CHECK: liveins: $x0
111+
; CHECK-NEXT: {{ $}}
112+
; CHECK-NEXT: %a:_(p0) = COPY $x0
113+
; CHECK-NEXT: %b:_(s32) = G_LOAD %a(p0) :: (load (s32))
114+
; CHECK-NEXT: %c:_(<2 x s16>) = G_BITCAST %b(s32)
115+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
116+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
117+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
118+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
119+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>)
120+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST %c(<2 x s16>)
121+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
122+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
123+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
124+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[BITCAST1]](s32), [[BITCAST2]](s32), [[BITCAST3]](s32)
125+
; CHECK-NEXT: %f:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR1]](<4 x s32>)
126+
; CHECK-NEXT: $q0 = COPY %f(<8 x s16>)
127+
; CHECK-NEXT: RET_ReallyLR implicit $q0
128+
%a:_(p0) = COPY $x0
129+
%b:_(s32) = G_LOAD %a:_(p0) :: (load (s32))
130+
%c:_(<2 x s16>) = G_BITCAST %b:_(s32)
131+
%d:_(s16) = G_IMPLICIT_DEF
132+
%e:_(<2 x s16>) = G_BUILD_VECTOR %d:_(s16), %d:_(s16)
133+
%f:_(<8 x s16>) = G_CONCAT_VECTORS %c:_(<2 x s16>), %e:_(<2 x s16>), %e:_(<2 x s16>), %e:_(<2 x s16>)
134+
135+
$q0 = COPY %f(<8 x s16>)
136+
RET_ReallyLR implicit $q0
137+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@
119119
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
120120
#
121121
# DEBUG-NEXT: G_CONCAT_VECTORS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
122-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
123-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
122+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
123+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
124124
#
125125
# DEBUG-NEXT: G_PTRTOINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
126126
# DEBUG-NEXT: .. the first uncovered type index: 2, OK

0 commit comments

Comments
 (0)