Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit d88cc08

Browse files
author
Weiming Zhao
committed
[ARM] Fix Scavenger assert due to underestimated stack size
Summary: Currently, when checking if a stack is "BigStack" or not, it doesn't count into spills and arguments. Therefore, LLVM won't reserve spill slot for this actually "BigStack". This may cause scavenger failure. Reviewers: rengolin Subscribers: aemerson, rengolin, tberghammer, danalbert, srhines, llvm-commits Differential Revision: http://reviews.llvm.org/D19896 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268529 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c8decc5 commit d88cc08

File tree

2 files changed

+211
-6
lines changed

2 files changed

+211
-6
lines changed

lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
14791479
bool CS1Spilled = false;
14801480
bool LRSpilled = false;
14811481
unsigned NumGPRSpills = 0;
1482+
unsigned NumFPRSpills = 0;
14821483
SmallVector<unsigned, 4> UnspilledCS1GPRs;
14831484
SmallVector<unsigned, 4> UnspilledCS2GPRs;
14841485
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
@@ -1533,8 +1534,17 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
15331534
CanEliminateFrame = false;
15341535
}
15351536

1536-
if (!ARM::GPRRegClass.contains(Reg))
1537+
if (!ARM::GPRRegClass.contains(Reg)) {
1538+
if (Spilled) {
1539+
if (ARM::SPRRegClass.contains(Reg))
1540+
NumFPRSpills++;
1541+
else if (ARM::DPRRegClass.contains(Reg))
1542+
NumFPRSpills += 2;
1543+
else if (ARM::QPRRegClass.contains(Reg))
1544+
NumFPRSpills += 4;
1545+
}
15371546
continue;
1547+
}
15381548

15391549
if (Spilled) {
15401550
NumGPRSpills++;
@@ -1607,11 +1617,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
16071617
// FIXME: We could add logic to be more precise about negative offsets
16081618
// and which instructions will need a scratch register for them. Is it
16091619
// worth the effort and added fragility?
1610-
bool BigStack = (RS && (MFI->estimateStackSize(MF) +
1611-
((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
1612-
estimateRSStackSizeLimit(MF, this))) ||
1613-
MFI->hasVarSizedObjects() ||
1614-
(MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
1620+
auto ArgStackSize = MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize();
1621+
bool BigStack =
1622+
(RS && (MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills) +
1623+
(!hasFP(MF) ? ArgStackSize : 0) + 16 /* possible paddings */ +
1624+
((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
1625+
estimateRSStackSizeLimit(MF, this))) ||
1626+
MFI->hasVarSizedObjects() ||
1627+
(MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
16151628

16161629
bool ExtraCSSpill = false;
16171630
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
; RUN: llc < %s
2+
; This test has around 4000 bytes of local variables and it also stresses register allocation
3+
; to force a register scavenging. It tests if the stack is treated as "BigStack" and thus
4+
; spill slots are reserved. If not, reg scavenger will assert.
5+
6+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
7+
target triple = "thumbv7--linux-android"
8+
9+
%struct.r = type { i32 (...)**, [10 x [9 x float]], [10 x [9 x float]], [101 x [9 x float]], [101 x [9 x float]], i32, i32, i32, i32, i32, [8 x [2 x i32]], [432 x float], [432 x i32], [10 x i8*], [10 x i8*], [10 x i32], [10 x i32], [10 x i32], [10 x i32], [10 x i32], [10 x i32], i32, i32, i32, i32, float, float, i32, i32, [9 x float], float*, float }
10+
11+
define void @foo(%struct.r* %this, float* %srcR, float* %srcC, float* %tempPntsX, float* %tY, float* %ms, float* %sX, float* %sY, i32* dereferenceable(4) %num, float* %tm, i32 %SR, i32 %lev, i8* %tdata, i32 %siW, i32 %pyw, i32 %pyh, i8* %sdata) #0 align 2 {
12+
entry:
13+
%sFV = alloca [49 x float], align 4
14+
%tFV = alloca [49 x float], align 4
15+
%TIM = alloca [9 x float], align 4
16+
%sort_tmp = alloca [432 x float], align 4
17+
%msDiffs = alloca [432 x float], align 4
18+
%TM.sroa.0.0.copyload = load float, float* %tm, align 4
19+
%TM.sroa.8.0.copyload = load float, float* null, align 4
20+
%TM.sroa.9.0..sroa_idx813 = getelementptr inbounds float, float* %tm, i32 6
21+
%TM.sroa.9.0.copyload = load float, float* %TM.sroa.9.0..sroa_idx813, align 4
22+
%TM.sroa.11.0.copyload = load float, float* undef, align 4
23+
br i1 undef, label %for.body.lr.ph, label %if.then343
24+
25+
for.body.lr.ph: ; preds = %entry
26+
%arrayidx8 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 1, i32 %lev, i32 0
27+
%arrayidx12 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 1, i32 %lev, i32 6
28+
%arrayidx15 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 1, i32 %lev, i32 4
29+
%arrayidx20 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 1, i32 %lev, i32 7
30+
%arrayidx24 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 2, i32 %lev, i32 0
31+
%arrayidx28 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 2, i32 %lev, i32 6
32+
%arrayidx32 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 2, i32 %lev, i32 4
33+
%arrayidx36 = getelementptr inbounds %struct.r, %struct.r* %this, i32 0, i32 2, i32 %lev, i32 7
34+
%arrayidx84 = getelementptr inbounds [9 x float], [9 x float]* %TIM, i32 0, i32 6
35+
%arrayidx92 = getelementptr inbounds [9 x float], [9 x float]* %TIM, i32 0, i32 7
36+
%add116 = add nsw i32 %pyh, 15
37+
br label %for.body
38+
39+
for.body: ; preds = %for.cond.cleanup40, %for.body.lr.ph
40+
%arrayidx.phi = phi float* [ %sX, %for.body.lr.ph ], [ %arrayidx.inc, %for.cond.cleanup40 ]
41+
%arrayidx4.phi = phi float* [ %sY, %for.body.lr.ph ], [ %arrayidx4.inc, %for.cond.cleanup40 ]
42+
%0 = load float, float* %arrayidx.phi, align 4
43+
%1 = load float, float* %arrayidx4.phi, align 4
44+
%2 = load float, float* %arrayidx12, align 4
45+
%add = fadd fast float 0.000000e+00, %2
46+
%3 = load float, float* %arrayidx20, align 4
47+
%add21 = fadd fast float 0.000000e+00, %3
48+
%mul3.i = fmul fast float %add21, %TM.sroa.8.0.copyload
49+
%add.i = fadd fast float 0.000000e+00, %TM.sroa.11.0.copyload
50+
%add5.i = fadd fast float %add.i, %mul3.i
51+
%conv6.i = fdiv fast float 1.000000e+00, %add5.i
52+
%mul8.i = fmul fast float %add, %TM.sroa.0.0.copyload
53+
%add11.i = fadd fast float %mul8.i, %TM.sroa.9.0.copyload
54+
%add13.i = fadd fast float %add11.i, 0.000000e+00
55+
%4 = load float, float* %arrayidx24, align 4
56+
%mul14.i = fmul fast float %add13.i, %4
57+
%mul25 = fmul fast float %mul14.i, %conv6.i
58+
%add29 = fadd fast float %mul25, 0.000000e+00
59+
%arrayidx.inc = getelementptr float, float* %arrayidx.phi, i32 1
60+
%arrayidx4.inc = getelementptr float, float* %arrayidx4.phi, i32 1
61+
%conv64.1 = sitofp i32 undef to float
62+
%conv64.6 = sitofp i32 undef to float
63+
br label %for.body41
64+
65+
for.cond.cleanup40: ; preds = %for.body41
66+
%call = call fast float undef(%struct.r* nonnull %this, float* undef, i32 49)
67+
br label %for.body
68+
69+
for.body41: ; preds = %for.cond.cleanup56.for.body41_crit_edge, %for.body
70+
%5 = phi float [ 0.000000e+00, %for.body ], [ %.pre, %for.cond.cleanup56.for.body41_crit_edge ]
71+
%sFVData.0840 = phi float* [ undef, %for.body ], [ undef, %for.cond.cleanup56.for.body41_crit_edge ]
72+
%dx.0838 = phi i32 [ -3, %for.body ], [ undef, %for.cond.cleanup56.for.body41_crit_edge ]
73+
%conv42 = sitofp i32 %dx.0838 to float
74+
%add43 = fadd fast float %conv42, %add29
75+
%conv44 = fptosi float %add43 to i32
76+
%conv48 = sitofp i32 %conv44 to float
77+
%mul49 = fmul fast float %5, %conv48
78+
%add53 = fadd fast float %mul49, 0.000000e+00
79+
%conv111 = fptosi float undef to i32
80+
%cond = select i1 undef, i32 %conv111, i32 -16
81+
%cond.add116 = select i1 undef, i32 %cond, i32 %add116
82+
%cmp132 = icmp sgt i32 undef, -16
83+
%cond137 = select i1 %cmp132, i32 undef, i32 -16
84+
%cond153 = select i1 undef, i32 %cond137, i32 undef
85+
%add.ptr = getelementptr inbounds i8, i8* %sdata, i32 %cond153
86+
%mul154 = mul nsw i32 %cond.add116, %siW
87+
%add.ptr155 = getelementptr inbounds i8, i8* %add.ptr, i32 %mul154
88+
%6 = load i8, i8* %add.ptr155, align 1
89+
%conv157 = uitofp i8 %6 to float
90+
%incdec.ptr = getelementptr inbounds float, float* %sFVData.0840, i32 1
91+
store float %conv157, float* %sFVData.0840, align 4
92+
%7 = load float, float* %arrayidx15, align 4
93+
%mul65.1 = fmul fast float %7, %conv64.1
94+
%8 = load float, float* %arrayidx20, align 4
95+
%add69.1 = fadd fast float %mul65.1, %8
96+
%conv78.1 = fdiv fast float 1.000000e+00, 0.000000e+00
97+
%9 = load float, float* undef, align 4
98+
%mul80.1 = fmul fast float %9, %add53
99+
%10 = load float, float* undef, align 4
100+
%mul82.1 = fmul fast float %10, %add69.1
101+
%add83.1 = fadd fast float %mul82.1, %mul80.1
102+
%11 = load float, float* %arrayidx84, align 4
103+
%add85.1 = fadd fast float %add83.1, %11
104+
%mul86.1 = fmul fast float %add85.1, %conv78.1
105+
%12 = load float, float* %arrayidx92, align 4
106+
%add93.1 = fadd fast float 0.000000e+00, %12
107+
%mul94.1 = fmul fast float %add93.1, %conv78.1
108+
%13 = load float, float* %arrayidx24, align 4
109+
%mul98.1 = fmul fast float %mul86.1, %13
110+
%14 = load float, float* %arrayidx28, align 4
111+
%add102.1 = fadd fast float %mul98.1, %14
112+
%15 = load float, float* %arrayidx32, align 4
113+
%mul106.1 = fmul fast float %mul94.1, %15
114+
%16 = load float, float* %arrayidx36, align 4
115+
%add110.1 = fadd fast float %mul106.1, %16
116+
%conv111.1 = fptosi float %add102.1 to i32
117+
%conv112.1 = fptosi float %add110.1 to i32
118+
%cond.1 = select i1 undef, i32 %conv111.1, i32 -16
119+
%cond.add116.1 = select i1 undef, i32 %cond.1, i32 %add116
120+
%cond137.1 = select i1 undef, i32 %conv112.1, i32 -16
121+
%cond153.1 = select i1 undef, i32 %cond137.1, i32 undef
122+
%add.ptr.1 = getelementptr inbounds i8, i8* %sdata, i32 %cond153.1
123+
%mul154.1 = mul nsw i32 %cond.add116.1, %siW
124+
%add.ptr155.1 = getelementptr inbounds i8, i8* %add.ptr.1, i32 %mul154.1
125+
%17 = load i8, i8* %add.ptr155.1, align 1
126+
%conv157.1 = uitofp i8 %17 to float
127+
%incdec.ptr.1 = getelementptr inbounds float, float* %sFVData.0840, i32 2
128+
store float %conv157.1, float* %incdec.ptr, align 4
129+
%conv112.2 = fptosi float undef to i32
130+
%cond137.2 = select i1 undef, i32 %conv112.2, i32 -16
131+
%cond153.2 = select i1 undef, i32 %cond137.2, i32 undef
132+
%add.ptr.2 = getelementptr inbounds i8, i8* %sdata, i32 %cond153.2
133+
%add.ptr155.2 = getelementptr inbounds i8, i8* %add.ptr.2, i32 0
134+
%18 = load i8, i8* %add.ptr155.2, align 1
135+
%conv157.2 = uitofp i8 %18 to float
136+
%incdec.ptr.2 = getelementptr inbounds float, float* %sFVData.0840, i32 3
137+
store float %conv157.2, float* %incdec.ptr.1, align 4
138+
%cmp132.3 = icmp sgt i32 undef, -16
139+
%cond137.3 = select i1 %cmp132.3, i32 undef, i32 -16
140+
%cond153.3 = select i1 undef, i32 %cond137.3, i32 undef
141+
%add.ptr.3 = getelementptr inbounds i8, i8* %sdata, i32 %cond153.3
142+
%add.ptr155.3 = getelementptr inbounds i8, i8* %add.ptr.3, i32 0
143+
%19 = load i8, i8* %add.ptr155.3, align 1
144+
%conv157.3 = uitofp i8 %19 to float
145+
store float %conv157.3, float* %incdec.ptr.2, align 4
146+
%incdec.ptr.5 = getelementptr inbounds float, float* %sFVData.0840, i32 6
147+
%20 = load float, float* %arrayidx15, align 4
148+
%mul65.6 = fmul fast float %20, %conv64.6
149+
%21 = load float, float* %arrayidx20, align 4
150+
%add69.6 = fadd fast float %mul65.6, %21
151+
%conv78.6 = fdiv fast float 1.000000e+00, 0.000000e+00
152+
%22 = load float, float* undef, align 4
153+
%mul82.6 = fmul fast float %22, %add69.6
154+
%add83.6 = fadd fast float %mul82.6, 0.000000e+00
155+
%23 = load float, float* %arrayidx84, align 4
156+
%add85.6 = fadd fast float %add83.6, %23
157+
%mul86.6 = fmul fast float %add85.6, %conv78.6
158+
%24 = load float, float* %arrayidx24, align 4
159+
%mul98.6 = fmul fast float %mul86.6, %24
160+
%25 = load float, float* %arrayidx28, align 4
161+
%add102.6 = fadd fast float %mul98.6, %25
162+
%conv111.6 = fptosi float %add102.6 to i32
163+
%conv112.6 = fptosi float undef to i32
164+
%cond.6 = select i1 undef, i32 %conv111.6, i32 -16
165+
%cond.add116.6 = select i1 undef, i32 %cond.6, i32 %add116
166+
%cmp132.6 = icmp sgt i32 %conv112.6, -16
167+
%cond137.6 = select i1 %cmp132.6, i32 %conv112.6, i32 -16
168+
%cond153.6 = select i1 undef, i32 %cond137.6, i32 undef
169+
%add.ptr.6 = getelementptr inbounds i8, i8* %sdata, i32 %cond153.6
170+
%mul154.6 = mul nsw i32 %cond.add116.6, %siW
171+
%add.ptr155.6 = getelementptr inbounds i8, i8* %add.ptr.6, i32 %mul154.6
172+
%26 = load i8, i8* %add.ptr155.6, align 1
173+
%conv157.6 = uitofp i8 %26 to float
174+
store float %conv157.6, float* %incdec.ptr.5, align 4
175+
%exitcond874 = icmp eq i32 %dx.0838, 3
176+
br i1 %exitcond874, label %for.cond.cleanup40, label %for.cond.cleanup56.for.body41_crit_edge
177+
178+
for.cond.cleanup56.for.body41_crit_edge: ; preds = %for.body41
179+
%.pre = load float, float* %arrayidx8, align 4
180+
br label %for.body41
181+
182+
if.then343: ; preds = %entry
183+
ret void
184+
}
185+
186+
attributes #0 = { sspstrong uwtable "no-frame-pointer-elim"="false" "target-cpu"="cortex-a7" }
187+
188+
!1 = !{!2, !2, i64 0}
189+
!2 = !{!"float", !3, i64 0}
190+
!3 = !{!"omnipotent char", !4, i64 0}
191+
!4 = !{!"Simple C/C++ TBAA"}
192+
!5 = !{!3, !3, i64 0}

0 commit comments

Comments
 (0)