Skip to content

Commit a88f31d

Browse files
authored
[X86] Fix overflow with large stack probes on x86-64 (#113219)
When emitting an inline stack probe loop, we can't use SUBri to calculate the loop bound if it doesn't fit in a 32-bit (possibly sign-extended) immediate. Fixes #113218.
1 parent ebb27cc commit a88f31d

File tree

3 files changed

+198
-10
lines changed

3 files changed

+198
-10
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -797,18 +797,40 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
797797
: Is64Bit ? X86::R11D
798798
: X86::EAX;
799799

800-
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
801-
.addReg(StackPtr)
802-
.setMIFlag(MachineInstr::FrameSetup);
803-
804800
// save loop bound
805801
{
806-
const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
807-
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
808-
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
809-
.addReg(FinalStackProbed)
810-
.addImm(BoundOffset)
811-
.setMIFlag(MachineInstr::FrameSetup);
802+
const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
803+
804+
// Can we calculate the loop bound using SUB with a 32-bit immediate?
805+
// Note that the immediate gets sign-extended when used with a 64-bit
806+
// register, so in that case we only have 31 bits to work with.
807+
bool canUseSub =
808+
Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
809+
810+
if (canUseSub) {
811+
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
812+
813+
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
814+
.addReg(StackPtr)
815+
.setMIFlag(MachineInstr::FrameSetup);
816+
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
817+
.addReg(FinalStackProbed)
818+
.addImm(BoundOffset)
819+
.setMIFlag(MachineInstr::FrameSetup);
820+
} else if (Uses64BitFramePtr) {
821+
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
822+
.addImm(-BoundOffset)
823+
.setMIFlag(MachineInstr::FrameSetup);
824+
BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
825+
.addReg(FinalStackProbed)
826+
.addReg(StackPtr)
827+
.setMIFlag(MachineInstr::FrameSetup);
828+
} else {
829+
// We're being asked to probe a stack frame that's 4 GiB or larger,
830+
// but our stack pointer is only 32 bits. This might be unreachable
831+
// code, so don't complain now; just trap if it's reached at runtime.
832+
BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
833+
}
812834

813835
// while in the loop, use loop-invariant reg for CFI,
814836
// instead of the stack pointer, which changes during the loop
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
2+
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
3+
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
4+
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
5+
6+
define i32 @foo() local_unnamed_addr #0 {
7+
; CHECK-X64-LABEL: foo:
8+
; CHECK-X64: # %bb.0:
9+
; CHECK-X64-NEXT: movabsq $-4799995904, %r11 # imm = 0xFFFFFFFEE1E5E000
10+
; CHECK-X64-NEXT: addq %rsp, %r11
11+
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
12+
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 4799995904
13+
; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
14+
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
15+
; CHECK-X64-NEXT: movq $0, (%rsp)
16+
; CHECK-X64-NEXT: cmpq %r11, %rsp
17+
; CHECK-X64-NEXT: jne .LBB0_1
18+
; CHECK-X64-NEXT: # %bb.2:
19+
; CHECK-X64-NEXT: subq $3976, %rsp # imm = 0xF88
20+
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
21+
; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999888
22+
; CHECK-X64-NEXT: movl $1, 264(%rsp)
23+
; CHECK-X64-NEXT: movl $1, 28664(%rsp)
24+
; CHECK-X64-NEXT: movl -128(%rsp), %eax
25+
; CHECK-X64-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88
26+
; CHECK-X64-NEXT: addq %rcx, %rsp
27+
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
28+
; CHECK-X64-NEXT: retq
29+
;
30+
; CHECK-X86-LABEL: foo:
31+
; CHECK-X86: # %bb.0:
32+
; CHECK-X86-NEXT: ud2
33+
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
34+
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 4800000000
35+
; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
36+
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
37+
; CHECK-X86-NEXT: movl $0, (%esp)
38+
; CHECK-X86-NEXT: cmpl %eax, %esp
39+
; CHECK-X86-NEXT: jne .LBB0_1
40+
; CHECK-X86-NEXT: # %bb.2:
41+
; CHECK-X86-NEXT: subl $12, %esp
42+
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
43+
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000016
44+
; CHECK-X86-NEXT: movl $1, 392(%esp)
45+
; CHECK-X86-NEXT: movl $1, 28792(%esp)
46+
; CHECK-X86-NEXT: movl (%esp), %eax
47+
; CHECK-X86-NEXT: movl $4800000012, %ecx # imm = 0x11E1A300C
48+
; CHECK-X86-NEXT: addl %ecx, %esp
49+
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
50+
; CHECK-X86-NEXT: retl
51+
;
52+
; CHECK-X32-LABEL: foo:
53+
; CHECK-X32: # %bb.0:
54+
; CHECK-X32-NEXT: ud2
55+
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
56+
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 4799995904
57+
; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
58+
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
59+
; CHECK-X32-NEXT: movq $0, (%esp)
60+
; CHECK-X32-NEXT: cmpl %r11d, %esp
61+
; CHECK-X32-NEXT: jne .LBB0_1
62+
; CHECK-X32-NEXT: # %bb.2:
63+
; CHECK-X32-NEXT: subl $3976, %esp # imm = 0xF88
64+
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
65+
; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999888
66+
; CHECK-X32-NEXT: movl $1, 264(%esp)
67+
; CHECK-X32-NEXT: movl $1, 28664(%esp)
68+
; CHECK-X32-NEXT: movl -128(%esp), %eax
69+
; CHECK-X32-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88
70+
; CHECK-X32-NEXT: addq %rcx, %esp
71+
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
72+
; CHECK-X32-NEXT: retq
73+
%a = alloca i32, i64 1200000000, align 16
74+
%b0 = getelementptr inbounds i32, ptr %a, i64 98
75+
%b1 = getelementptr inbounds i32, ptr %a, i64 7198
76+
store volatile i32 1, ptr %b0
77+
store volatile i32 1, ptr %b1
78+
%c = load volatile i32, ptr %a
79+
ret i32 %c
80+
}
81+
82+
attributes #0 = {"probe-stack"="inline-asm"}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
2+
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
3+
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
4+
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
5+
6+
define i32 @foo() local_unnamed_addr #0 {
7+
; CHECK-X64-LABEL: foo:
8+
; CHECK-X64: # %bb.0:
9+
; CHECK-X64-NEXT: movabsq $-2399997952, %r11 # imm = 0xFFFFFFFF70F2F000
10+
; CHECK-X64-NEXT: addq %rsp, %r11
11+
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
12+
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 2399997952
13+
; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
14+
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
15+
; CHECK-X64-NEXT: movq $0, (%rsp)
16+
; CHECK-X64-NEXT: cmpq %r11, %rsp
17+
; CHECK-X64-NEXT: jne .LBB0_1
18+
; CHECK-X64-NEXT: # %bb.2:
19+
; CHECK-X64-NEXT: subq $1928, %rsp # imm = 0x788
20+
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
21+
; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999888
22+
; CHECK-X64-NEXT: movl $1, 264(%rsp)
23+
; CHECK-X64-NEXT: movl $1, 28664(%rsp)
24+
; CHECK-X64-NEXT: movl -128(%rsp), %eax
25+
; CHECK-X64-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
26+
; CHECK-X64-NEXT: addq %rcx, %rsp
27+
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
28+
; CHECK-X64-NEXT: retq
29+
;
30+
; CHECK-X86-LABEL: foo:
31+
; CHECK-X86: # %bb.0:
32+
; CHECK-X86-NEXT: movl %esp, %eax
33+
; CHECK-X86-NEXT: subl $2399997952, %eax # imm = 0x8F0D1000
34+
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
35+
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 2399997952
36+
; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
37+
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
38+
; CHECK-X86-NEXT: movl $0, (%esp)
39+
; CHECK-X86-NEXT: cmpl %eax, %esp
40+
; CHECK-X86-NEXT: jne .LBB0_1
41+
; CHECK-X86-NEXT: # %bb.2:
42+
; CHECK-X86-NEXT: subl $2060, %esp # imm = 0x80C
43+
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
44+
; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000016
45+
; CHECK-X86-NEXT: movl $1, 392(%esp)
46+
; CHECK-X86-NEXT: movl $1, 28792(%esp)
47+
; CHECK-X86-NEXT: movl (%esp), %eax
48+
; CHECK-X86-NEXT: movl $2400000012, %ecx # imm = 0x8F0D180C
49+
; CHECK-X86-NEXT: addl %ecx, %esp
50+
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
51+
; CHECK-X86-NEXT: retl
52+
;
53+
; CHECK-X32-LABEL: foo:
54+
; CHECK-X32: # %bb.0:
55+
; CHECK-X32-NEXT: movl %esp, %r11d
56+
; CHECK-X32-NEXT: subl $2399997952, %r11d # imm = 0x8F0D1000
57+
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
58+
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 2399997952
59+
; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
60+
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
61+
; CHECK-X32-NEXT: movq $0, (%esp)
62+
; CHECK-X32-NEXT: cmpl %r11d, %esp
63+
; CHECK-X32-NEXT: jne .LBB0_1
64+
; CHECK-X32-NEXT: # %bb.2:
65+
; CHECK-X32-NEXT: subl $1928, %esp # imm = 0x788
66+
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
67+
; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999888
68+
; CHECK-X32-NEXT: movl $1, 264(%esp)
69+
; CHECK-X32-NEXT: movl $1, 28664(%esp)
70+
; CHECK-X32-NEXT: movl -128(%esp), %eax
71+
; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
72+
; CHECK-X32-NEXT: addq %rcx, %esp
73+
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
74+
; CHECK-X32-NEXT: retq
75+
%a = alloca i32, i64 600000000, align 16
76+
%b0 = getelementptr inbounds i32, ptr %a, i64 98
77+
%b1 = getelementptr inbounds i32, ptr %a, i64 7198
78+
store volatile i32 1, ptr %b0
79+
store volatile i32 1, ptr %b1
80+
%c = load volatile i32, ptr %a
81+
ret i32 %c
82+
}
83+
84+
attributes #0 = {"probe-stack"="inline-asm"}

0 commit comments

Comments
 (0)