Skip to content

Commit 1931ed7

Browse files
author
Kai Luo
committed
[PowerPC][Peephole] Combine extsw and sldi after instruction selection
Summary: `extsw` and `sldi` are supposed to be combined if they are in the same BB in instruction selection phase. This patch handles the case where extsw and sldi are not in the same BB. Differential Revision: https://reviews.llvm.org/D63806 llvm-svn: 365430
1 parent 25ab27e commit 1931ed7

File tree

4 files changed

+216
-11
lines changed

4 files changed

+216
-11
lines changed

llvm/lib/Target/PowerPC/P9InstrResources.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
122122
(instrs
123123
(instregex "S(L|R)D$"),
124124
(instregex "SRAD(I)?$"),
125-
(instregex "EXTSWSLI$"),
125+
(instregex "EXTSWSLI_32_64$"),
126126
(instregex "MFV(S)?RD$"),
127127
(instregex "MTVSRD$"),
128128
(instregex "MTVSRW(A|Z)$"),
@@ -158,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
158158
XSNEGDP,
159159
XSCPSGNDP,
160160
MFVSRWZ,
161+
EXTSWSLI,
161162
SRADI_32,
162163
RLDIC,
163164
RFEBB,
@@ -1101,8 +1102,9 @@ def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
11011102
// The two ops cannot be done in parallel.
11021103
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
11031104
(instrs
1104-
(instregex "EXTSWSLIo$"),
1105+
(instregex "EXTSWSLI_32_64o$"),
11051106
(instregex "SRAD(I)?o$"),
1107+
EXTSWSLIo,
11061108
SLDo,
11071109
SRDo,
11081110
RLDICo

llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -725,10 +725,17 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
725725
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
726726
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
727727

728-
defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
729-
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
730-
[(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
731-
isPPC64, Requires<[IsISA3_0]>;
728+
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
729+
defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
730+
(ins gprc:$rS, u6imm:$SH),
731+
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
732+
[(set i64:$rA,
733+
(PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
734+
isPPC64, Requires<[IsISA3_0]>;
735+
736+
defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
737+
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
738+
[]>, isPPC64, Requires<[IsISA3_0]>;
732739

733740
// For fast-isel:
734741
let isCodeGenOnly = 1, Defs = [CARRY] in

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ STATISTIC(NumFixedPointIterations,
5353
"to reg-imm ones");
5454
STATISTIC(NumRotatesCollapsed,
5555
"Number of pairs of rotate left, clear left/right collapsed");
56+
STATISTIC(NumEXTSWAndSLDICombined,
57+
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
5658

5759
static cl::opt<bool>
5860
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -101,6 +103,7 @@ struct PPCMIPeephole : public MachineFunctionPass {
101103
// Perform peepholes.
102104
bool eliminateRedundantCompare(void);
103105
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
106+
bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
104107
bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
105108
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
106109
MachineInstr *MI);
@@ -799,7 +802,8 @@ bool PPCMIPeephole::simplifyCode(void) {
799802
break;
800803
}
801804
case PPC::RLDICR: {
802-
Simplified |= emitRLDICWhenLoweringJumpTables(MI);
805+
Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
806+
combineSEXTAndSHL(MI, ToErase);
803807
break;
804808
}
805809
}
@@ -1379,6 +1383,72 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
13791383
return true;
13801384
}
13811385

1386+
// For case in LLVM IR
1387+
// entry:
1388+
// %iconv = sext i32 %index to i64
1389+
// br i1 undef label %true, label %false
1390+
// true:
1391+
// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1392+
// ...
1393+
// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1394+
// different BBs when conducting instruction selection. We can do a peephole
1395+
// optimization to combine these two instructions into extswsli after
1396+
// instruction selection.
1397+
bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1398+
MachineInstr *&ToErase) {
1399+
if (MI.getOpcode() != PPC::RLDICR)
1400+
return false;
1401+
1402+
if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1403+
return false;
1404+
1405+
assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1406+
1407+
MachineOperand MOpSHMI = MI.getOperand(2);
1408+
MachineOperand MOpMEMI = MI.getOperand(3);
1409+
if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1410+
return false;
1411+
1412+
uint64_t SHMI = MOpSHMI.getImm();
1413+
uint64_t MEMI = MOpMEMI.getImm();
1414+
if (SHMI + MEMI != 63)
1415+
return false;
1416+
1417+
unsigned SrcReg = MI.getOperand(1).getReg();
1418+
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1419+
return false;
1420+
1421+
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1422+
if (SrcMI->getOpcode() != PPC::EXTSW &&
1423+
SrcMI->getOpcode() != PPC::EXTSW_32_64)
1424+
return false;
1425+
1426+
// If the register defined by extsw has more than one use, combination is not
1427+
// needed.
1428+
if (!MRI->hasOneNonDBGUse(SrcReg))
1429+
return false;
1430+
1431+
LLVM_DEBUG(dbgs() << "Combining pair: ");
1432+
LLVM_DEBUG(SrcMI->dump());
1433+
LLVM_DEBUG(MI.dump());
1434+
1435+
MachineInstr *NewInstr =
1436+
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1437+
SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1438+
: TII->get(PPC::EXTSWSLI_32_64),
1439+
MI.getOperand(0).getReg())
1440+
.add(SrcMI->getOperand(1))
1441+
.add(MOpSHMI);
1442+
1443+
LLVM_DEBUG(dbgs() << "TO: ");
1444+
LLVM_DEBUG(NewInstr->dump());
1445+
++NumEXTSWAndSLDICombined;
1446+
ToErase = &MI;
1447+
// SrcMI, which is extsw, is of no use now, erase it.
1448+
SrcMI->eraseFromParent();
1449+
return true;
1450+
}
1451+
13821452
} // end default namespace
13831453

13841454
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,

llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll

Lines changed: 130 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ define dso_local i32 @poc(i32* %base, i32 %index, i1 %flag, i32 %default) {
4242
; CHECK-P9-NEXT: andi. r5, r5, 1
4343
; CHECK-P9-NEXT: bc 4, gt, .LBB0_2
4444
; CHECK-P9-NEXT: # %bb.1: # %true
45-
; CHECK-P9-NEXT: extsw r4, r4
46-
; CHECK-P9-NEXT: sldi r4, r4, 2
45+
; CHECK-P9-NEXT: extswsli r4, r4, 2
4746
; CHECK-P9-NEXT: lwzx r3, r3, r4
4847
; CHECK-P9-NEXT: blr
4948
; CHECK-P9-NEXT: .LBB0_2: # %false
@@ -55,8 +54,7 @@ define dso_local i32 @poc(i32* %base, i32 %index, i1 %flag, i32 %default) {
5554
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
5655
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB0_2
5756
; CHECK-P9-BE-NEXT: # %bb.1: # %true
58-
; CHECK-P9-BE-NEXT: extsw r4, r4
59-
; CHECK-P9-BE-NEXT: sldi r4, r4, 2
57+
; CHECK-P9-BE-NEXT: extswsli r4, r4, 2
6058
; CHECK-P9-BE-NEXT: lwzx r3, r3, r4
6159
; CHECK-P9-BE-NEXT: blr
6260
; CHECK-P9-BE-NEXT: .LBB0_2: # %false
@@ -74,3 +72,131 @@ true:
7472
false:
7573
ret i32 %default
7674
}
75+
76+
define dso_local i64 @poc_i64(i64* %base, i32 %index, i1 %flag, i64 %default) {
77+
; CHECK-LABEL: poc_i64:
78+
; CHECK: # %bb.0: # %entry
79+
; CHECK-NEXT: andi. r5, r5, 1
80+
; CHECK-NEXT: bc 4, gt, .LBB1_2
81+
; CHECK-NEXT: # %bb.1: # %true
82+
; CHECK-NEXT: extsw r4, r4
83+
; CHECK-NEXT: sldi r4, r4, 3
84+
; CHECK-NEXT: ldx r3, r3, r4
85+
; CHECK-NEXT: blr
86+
; CHECK-NEXT: .LBB1_2: # %false
87+
; CHECK-NEXT: mr r3, r6
88+
; CHECK-NEXT: blr
89+
;
90+
; CHECK-BE-LABEL: poc_i64:
91+
; CHECK-BE: # %bb.0: # %entry
92+
; CHECK-BE-NEXT: andi. r5, r5, 1
93+
; CHECK-BE-NEXT: bc 4, gt, .LBB1_2
94+
; CHECK-BE-NEXT: # %bb.1: # %true
95+
; CHECK-BE-NEXT: extsw r4, r4
96+
; CHECK-BE-NEXT: sldi r4, r4, 3
97+
; CHECK-BE-NEXT: ldx r3, r3, r4
98+
; CHECK-BE-NEXT: blr
99+
; CHECK-BE-NEXT: .LBB1_2: # %false
100+
; CHECK-BE-NEXT: mr r3, r6
101+
; CHECK-BE-NEXT: blr
102+
;
103+
; CHECK-P9-LABEL: poc_i64:
104+
; CHECK-P9: # %bb.0: # %entry
105+
; CHECK-P9-NEXT: andi. r5, r5, 1
106+
; CHECK-P9-NEXT: bc 4, gt, .LBB1_2
107+
; CHECK-P9-NEXT: # %bb.1: # %true
108+
; CHECK-P9-NEXT: extswsli r4, r4, 3
109+
; CHECK-P9-NEXT: ldx r3, r3, r4
110+
; CHECK-P9-NEXT: blr
111+
; CHECK-P9-NEXT: .LBB1_2: # %false
112+
; CHECK-P9-NEXT: mr r3, r6
113+
; CHECK-P9-NEXT: blr
114+
;
115+
; CHECK-P9-BE-LABEL: poc_i64:
116+
; CHECK-P9-BE: # %bb.0: # %entry
117+
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
118+
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB1_2
119+
; CHECK-P9-BE-NEXT: # %bb.1: # %true
120+
; CHECK-P9-BE-NEXT: extswsli r4, r4, 3
121+
; CHECK-P9-BE-NEXT: ldx r3, r3, r4
122+
; CHECK-P9-BE-NEXT: blr
123+
; CHECK-P9-BE-NEXT: .LBB1_2: # %false
124+
; CHECK-P9-BE-NEXT: mr r3, r6
125+
; CHECK-P9-BE-NEXT: blr
126+
entry:
127+
%iconv = sext i32 %index to i64
128+
br i1 %flag, label %true, label %false
129+
130+
true:
131+
%ptr = getelementptr inbounds i64, i64* %base, i64 %iconv
132+
%value = load i64, i64* %ptr, align 8
133+
ret i64 %value
134+
135+
false:
136+
ret i64 %default
137+
}
138+
139+
define dso_local i64 @no_extswsli(i64* %base, i32 %index, i1 %flag) {
140+
; CHECK-LABEL: no_extswsli:
141+
; CHECK: # %bb.0: # %entry
142+
; CHECK-NEXT: andi. r5, r5, 1
143+
; CHECK-NEXT: extsw r4, r4
144+
; CHECK-NEXT: bc 4, gt, .LBB2_2
145+
; CHECK-NEXT: # %bb.1: # %true
146+
; CHECK-NEXT: sldi r4, r4, 3
147+
; CHECK-NEXT: ldx r3, r3, r4
148+
; CHECK-NEXT: blr
149+
; CHECK-NEXT: .LBB2_2: # %false
150+
; CHECK-NEXT: mr r3, r4
151+
; CHECK-NEXT: blr
152+
;
153+
; CHECK-BE-LABEL: no_extswsli:
154+
; CHECK-BE: # %bb.0: # %entry
155+
; CHECK-BE-NEXT: andi. r5, r5, 1
156+
; CHECK-BE-NEXT: extsw r4, r4
157+
; CHECK-BE-NEXT: bc 4, gt, .LBB2_2
158+
; CHECK-BE-NEXT: # %bb.1: # %true
159+
; CHECK-BE-NEXT: sldi r4, r4, 3
160+
; CHECK-BE-NEXT: ldx r3, r3, r4
161+
; CHECK-BE-NEXT: blr
162+
; CHECK-BE-NEXT: .LBB2_2: # %false
163+
; CHECK-BE-NEXT: mr r3, r4
164+
; CHECK-BE-NEXT: blr
165+
;
166+
; CHECK-P9-LABEL: no_extswsli:
167+
; CHECK-P9: # %bb.0: # %entry
168+
; CHECK-P9-NEXT: extsw r4, r4
169+
; CHECK-P9-NEXT: andi. r5, r5, 1
170+
; CHECK-P9-NEXT: bc 4, gt, .LBB2_2
171+
; CHECK-P9-NEXT: # %bb.1: # %true
172+
; CHECK-P9-NEXT: sldi r4, r4, 3
173+
; CHECK-P9-NEXT: ldx r3, r3, r4
174+
; CHECK-P9-NEXT: blr
175+
; CHECK-P9-NEXT: .LBB2_2: # %false
176+
; CHECK-P9-NEXT: mr r3, r4
177+
; CHECK-P9-NEXT: blr
178+
;
179+
; CHECK-P9-BE-LABEL: no_extswsli:
180+
; CHECK-P9-BE: # %bb.0: # %entry
181+
; CHECK-P9-BE-NEXT: extsw r4, r4
182+
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
183+
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB2_2
184+
; CHECK-P9-BE-NEXT: # %bb.1: # %true
185+
; CHECK-P9-BE-NEXT: sldi r4, r4, 3
186+
; CHECK-P9-BE-NEXT: ldx r3, r3, r4
187+
; CHECK-P9-BE-NEXT: blr
188+
; CHECK-P9-BE-NEXT: .LBB2_2: # %false
189+
; CHECK-P9-BE-NEXT: mr r3, r4
190+
; CHECK-P9-BE-NEXT: blr
191+
entry:
192+
%iconv = sext i32 %index to i64
193+
br i1 %flag, label %true, label %false
194+
195+
true:
196+
%ptr = getelementptr inbounds i64, i64* %base, i64 %iconv
197+
%value = load i64, i64* %ptr, align 8
198+
ret i64 %value
199+
200+
false:
201+
ret i64 %iconv
202+
}

0 commit comments

Comments
 (0)