From f3af86010b3a084c35a11d347bfca500743209e5 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Tue, 10 Jun 2025 00:59:09 +0200 Subject: [PATCH 1/4] add a fixme to use `extern_custom` when available --- .../compiler-builtins/src/probestack.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs index c9070cf55c64a..16faaa67f8adb 100644 --- a/library/compiler-builtins/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs @@ -125,6 +125,9 @@ macro_rules! define_rust_probestack { // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. // @@ -260,6 +263,9 @@ core::arch::global_asm!( // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // The ABI here is the same as x86_64, except everything is 32-bits large. core::arch::global_asm!( define_rust_probestack!( @@ -303,6 +309,9 @@ core::arch::global_asm!( // probestack function will also do things like _chkstk in MSVC. // So we need to sub %ax %sp in probestack when arch is x86. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // REF: Rust commit(74e80468347) // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 // Comments in LLVM: From b6eb4f9c3a8c1f2228322095e83c56ea226d0a28 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:31:34 +0200 Subject: [PATCH 2/4] use `#[naked]` for `__rust_probestack` --- compiler/rustc_codegen_llvm/src/attributes.rs | 5 +- .../compiler-builtins/src/lib.rs | 1 + .../compiler-builtins/src/probestack.rs | 129 +++++------------- 3 files changed, 36 insertions(+), 99 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 443c2eace554d..27fd09745ff08 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -5,6 +5,7 @@ use rustc_hir::def_id::DefId; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry}; use rustc_middle::ty::{self, TyCtxt}; use rustc_session::config::{BranchProtection, FunctionReturn, OptLevel, PAuthKey, PacRet}; +use rustc_symbol_mangling::mangle_internal_symbol; use rustc_target::spec::{FramePointer, SanitizerSet, StackProbeType, StackProtector}; use smallvec::SmallVec; @@ -256,11 +257,11 @@ fn probestack_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { StackProbeType::Inline => "inline-asm", // Flag our internal `__rust_probestack` function as the stack probe symbol. // This is defined in the `compiler-builtins` crate for each architecture. - StackProbeType::Call => "__rust_probestack", + StackProbeType::Call => &mangle_internal_symbol(cx.tcx, "__rust_probestack"), // Pick from the two above based on the LLVM version. StackProbeType::InlineOrCall { min_llvm_version_for_inline } => { if llvm_util::get_version() < min_llvm_version_for_inline { - "__rust_probestack" + &mangle_internal_symbol(cx.tcx, "__rust_probestack") } else { "inline-asm" } diff --git a/library/compiler-builtins/compiler-builtins/src/lib.rs b/library/compiler-builtins/compiler-builtins/src/lib.rs index 6a6b28067e8c8..6549d4cef9eda 100644 --- a/library/compiler-builtins/compiler-builtins/src/lib.rs +++ b/library/compiler-builtins/compiler-builtins/src/lib.rs @@ -8,6 +8,7 @@ #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] +#![feature(rustc_attrs)] #![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs index 16faaa67f8adb..e9a26dff188de 100644 --- a/library/compiler-builtins/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs @@ -49,79 +49,6 @@ // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -// SAFETY: defined in this module. -// FIXME(extern_custom): the ABI is not correct. -unsafe extern "C" { - pub fn __rust_probestack(); -} - -// A wrapper for our implementation of __rust_probestack, which allows us to -// keep the assembly inline while controlling all CFI directives in the assembly -// emitted for the function. -// -// This is the ELF version. -#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .pushsection .text.__rust_probestack - .globl __rust_probestack - .type __rust_probestack, @function - .hidden __rust_probestack - __rust_probestack: - ", - $body, - " - .size __rust_probestack, . - __rust_probestack - .popsection - " - ) - }; -} - -#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl __rust_probestack - __rust_probestack: - ", - $body - ) - }; -} - -// Same as above, but for Mach-O. Note that the triple underscore -// is deliberate -#[cfg(target_vendor = "apple")] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - -// In UEFI x86 arch, triple underscore is deliberate. -#[cfg(all(target_os = "uefi", target_arch = "x86"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // @@ -136,8 +63,10 @@ macro_rules! define_rust_probestack { target_arch = "x86_64", not(all(target_env = "sgx", target_vendor = "fortanix")) ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -187,10 +116,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -8 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} // This function is the same as above, except that some instructions are // [manually patched for LVI]. @@ -200,8 +129,10 @@ core::arch::global_asm!( target_arch = "x86_64", all(target_env = "sgx", target_vendor = "fortanix") ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -253,10 +184,10 @@ core::arch::global_asm!( lfence jmp *%r11 .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note @@ -267,8 +198,10 @@ core::arch::global_asm!( // it does not actually match `extern "C"`. // // The ABI here is the same as x86_64, except everything is 32-bits large. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -299,10 +232,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", target_os = "uefi"))] // UEFI target is windows like target. LLVM will do _chkstk things like windows. @@ -318,8 +251,10 @@ core::arch::global_asm!( // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -355,7 +290,7 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} From 45d649e2ea83416cbbcf52e4053163b31c718ed0 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:32:04 +0200 Subject: [PATCH 3/4] merge the sgx/fortanix `__rust_probestack` into the general `x86_64` one --- .../compiler-builtins/src/probestack.rs | 96 +++++-------------- 1 file changed, 23 insertions(+), 73 deletions(-) diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs index e9a26dff188de..2375107e3d5c2 100644 --- a/library/compiler-builtins/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs @@ -57,15 +57,31 @@ // // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. -// -// Any changes to this function should be replicated to the SGX version below. -#[cfg(all( - target_arch = "x86_64", - not(all(target_env = "sgx", target_vendor = "fortanix")) -))] +#[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_probestack() { + #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] + macro_rules! ret { + () => { + "ret" + }; + } + + #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))] + macro_rules! ret { + // for this target, [manually patch for LVI]. + // + // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + () => { + " + pop %r11 + lfence + jmp *%r11 + " + }; + } + core::arch::naked_asm!( " .cfi_startproc @@ -114,75 +130,9 @@ pub unsafe extern "C" fn __rust_probestack() { leave .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 - ret - .cfi_endproc ", - options(att_syntax) - ) -} - -// This function is the same as above, except that some instructions are -// [manually patched for LVI]. -// -// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions -#[cfg(all( - target_arch = "x86_64", - all(target_env = "sgx", target_vendor = "fortanix") -))] -#[unsafe(naked)] -#[no_mangle] -pub unsafe extern "C" fn __rust_probestack() { - core::arch::naked_asm!( + ret!(), " - .cfi_startproc - pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_offset %rbp, -16 - movq %rsp, %rbp - .cfi_def_cfa_register %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $0x1000,%r11 - jna 3f -2: - sub $0x1000,%rsp - test %rsp,8(%rsp) - sub $0x1000,%r11 - cmp $0x1000,%r11 - ja 2b - -3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - .cfi_def_cfa_register %rsp - .cfi_adjust_cfa_offset -8 - pop %r11 - lfence - jmp *%r11 .cfi_endproc ", options(att_syntax) From b030442eb692cdbc30d13613db778aefe7397fa7 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:33:16 +0200 Subject: [PATCH 4/4] indent the probestack inline assembly --- .../compiler-builtins/src/probestack.rs | 220 +++++++++--------- 1 file changed, 110 insertions(+), 110 deletions(-) diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs index 2375107e3d5c2..1441fd73b8d6f 100644 --- a/library/compiler-builtins/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs @@ -84,56 +84,56 @@ pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_offset %rbp, -16 - movq %rsp, %rbp - .cfi_def_cfa_register %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $0x1000,%r11 - jna 3f -2: - sub $0x1000,%rsp - test %rsp,8(%rsp) - sub $0x1000,%r11 - cmp $0x1000,%r11 - ja 2b - -3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - .cfi_def_cfa_register %rsp - .cfi_adjust_cfa_offset -8 + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f + 2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + + 3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 ", ret!(), " - .cfi_endproc + .cfi_endproc ", options(att_syntax) ) @@ -153,35 +153,35 @@ pub unsafe extern "C" fn __rust_probestack() { pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - push %ebp - .cfi_adjust_cfa_offset 4 - .cfi_offset %ebp, -8 - mov %esp, %ebp - .cfi_def_cfa_register %ebp - push %ecx - mov %eax,%ecx - - cmp $0x1000,%ecx - jna 3f -2: - sub $0x1000,%esp - test %esp,8(%esp) - sub $0x1000,%ecx - cmp $0x1000,%ecx - ja 2b - -3: - sub %ecx,%esp - test %esp,8(%esp) - - add %eax,%esp - pop %ecx - leave - .cfi_def_cfa_register %esp - .cfi_adjust_cfa_offset -4 - ret - .cfi_endproc + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f + 2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + + 3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc ", options(att_syntax) ) @@ -206,40 +206,40 @@ pub unsafe extern "C" fn __rust_probestack() { pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - push %ebp - .cfi_adjust_cfa_offset 4 - .cfi_offset %ebp, -8 - mov %esp, %ebp - .cfi_def_cfa_register %ebp - push %ecx - push %edx - mov %eax,%ecx - - cmp $0x1000,%ecx - jna 3f -2: - sub $0x1000,%esp - test %esp,8(%esp) - sub $0x1000,%ecx - cmp $0x1000,%ecx - ja 2b - -3: - sub %ecx,%esp - test %esp,8(%esp) - mov 4(%ebp),%edx - mov %edx, 12(%esp) - add %eax,%esp - pop %edx - pop %ecx - leave - - sub %eax, %esp - .cfi_def_cfa_register %esp - .cfi_adjust_cfa_offset -4 - ret - .cfi_endproc + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + push %edx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f + 2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + + 3: + sub %ecx,%esp + test %esp,8(%esp) + mov 4(%ebp),%edx + mov %edx, 12(%esp) + add %eax,%esp + pop %edx + pop %ecx + leave + + sub %eax, %esp + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc ", options(att_syntax) )