From 436e4fb647bca08bd5cb7eea6f8450bb56515c10 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Thu, 2 Jan 2025 13:10:11 +0100 Subject: [PATCH 1/2] Cast global variables to default address space Pointers for variables all need to be in the same address space for correct compilation. Therefore ensure that even if a global variable is created in a different address space, it is casted to the default address space before its value is used. This is necessary for the amdgpu target and others where the default address space for global variables is not 0. For example `core` does not compile in debug mode when not casting the address space to the default one because it tries to emit the following (simplified) LLVM IR, containing a type mismatch: ```llvm @alloc_0 = addrspace(1) constant <{ [6 x i8] }> <{ [6 x i8] c"bit.rs" }>, align 1 @alloc_1 = addrspace(1) constant <{ ptr }> <{ ptr addrspace(1) @alloc_0 }>, align 8 ; ^ here a struct containing a `ptr` is needed, but it is created using a `ptr addrspace(1)` ``` For this to compile, we need to insert a constant `addrspacecast` before we use a global variable: ```llvm @alloc_0 = addrspace(1) constant <{ [6 x i8] }> <{ [6 x i8] c"bit.rs" }>, align 1 @alloc_1 = addrspace(1) constant <{ ptr }> <{ ptr addrspacecast (ptr addrspace(1) @alloc_0 to ptr) }>, align 8 ``` As vtables are global variables as well, they are also created with an `addrspacecast`. In the SSA backend, after a vtable global is created, metadata is added to it. To add metadata, we need the non-casted global variable. Therefore we strip away an addrspacecast if there is one, to get the underlying global. --- compiler/rustc_codegen_llvm/src/builder.rs | 4 +- compiler/rustc_codegen_llvm/src/common.rs | 7 +- compiler/rustc_codegen_llvm/src/consts.rs | 48 ++++++++---- .../src/debuginfo/metadata.rs | 15 ++++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 77 +++++++++++++++++++ 5 files changed, 130 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index ab0e43e60a4f5..1b99fea8d5154 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1225,7 +1225,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> { fn get_static(&mut self, def_id: DefId) -> &'ll Value { // Forward to the `get_static` method of `CodegenCx` - self.cx().get_static(def_id) + let s = self.cx().get_static(def_id); + // Cast to default address space if statics are in a different addrspace + self.cx().const_pointercast(s, self.type_ptr()) } } diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index adfe8aeb5c539..bd792c02810c6 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -221,6 +221,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global); } llvm::set_linkage(g, llvm::Linkage::InternalLinkage); + let g = self.const_pointercast(g, self.type_ptr()); (s.to_owned(), g) }) .1; @@ -285,7 +286,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { let alloc = alloc.inner(); let value = match alloc.mutability { Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None), - _ => self.static_addr_of(init, alloc.align, None), + _ => self.static_addr_of_impl(init, alloc.align, None), }; if !self.sess().fewer_names() && llvm::get_value_name(value).is_empty() { @@ -311,7 +312,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { .global_alloc(self.tcx.vtable_allocation((ty, dyn_ty.principal()))) .unwrap_memory(); let init = const_alloc_to_llvm(self, alloc, /*static*/ false); - let value = self.static_addr_of(init, alloc.inner().align, None); + let value = self.static_addr_of_impl(init, alloc.inner().align, None); (value, AddressSpace::DATA) } GlobalAlloc::Static(def_id) => { @@ -323,7 +324,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { let llval = unsafe { llvm::LLVMConstInBoundsGEP2( self.type_i8(), - self.const_bitcast(base_addr, self.type_ptr_ext(base_addr_space)), + self.const_pointercast(base_addr, self.type_ptr_ext(base_addr_space)), &self.const_usize(offset.bytes()), 1, ) diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index c7114480d8bdf..b88fd133e8e8a 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -210,6 +210,10 @@ impl<'ll> CodegenCx<'ll, '_> { unsafe { llvm::LLVMConstBitCast(val, ty) } } + pub(crate) fn const_pointercast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value { + unsafe { llvm::LLVMConstPointerCast(val, ty) } + } + pub(crate) fn static_addr_of_mut( &self, cv: &'ll Value, @@ -233,6 +237,31 @@ impl<'ll> CodegenCx<'ll, '_> { gv } + pub(crate) fn static_addr_of_impl( + &self, + cv: &'ll Value, + align: Align, + kind: Option<&str>, + ) -> &'ll Value { + if let Some(&gv) = self.const_globals.borrow().get(&cv) { + unsafe { + // Upgrade the alignment in cases where the same constant is used with different + // alignment requirements + let llalign = align.bytes() as u32; + if llalign > llvm::LLVMGetAlignment(gv) { + llvm::LLVMSetAlignment(gv, llalign); + } + } + return gv; + } + let gv = self.static_addr_of_mut(cv, align, kind); + unsafe { + llvm::LLVMSetGlobalConstant(gv, True); + } + self.const_globals.borrow_mut().insert(cv, gv); + gv + } + #[instrument(level = "debug", skip(self))] pub(crate) fn get_static(&self, def_id: DefId) -> &'ll Value { let instance = Instance::mono(self.tcx, def_id); @@ -506,23 +535,8 @@ impl<'ll> CodegenCx<'ll, '_> { impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> { fn static_addr_of(&self, cv: &'ll Value, align: Align, kind: Option<&str>) -> &'ll Value { - if let Some(&gv) = self.const_globals.borrow().get(&cv) { - unsafe { - // Upgrade the alignment in cases where the same constant is used with different - // alignment requirements - let llalign = align.bytes() as u32; - if llalign > llvm::LLVMGetAlignment(gv) { - llvm::LLVMSetAlignment(gv, llalign); - } - } - return gv; - } - let gv = self.static_addr_of_mut(cv, align, kind); - unsafe { - llvm::LLVMSetGlobalConstant(gv, True); - } - self.const_globals.borrow_mut().insert(cv, gv); - gv + let gv = self.static_addr_of_impl(cv, align, kind); + self.const_pointercast(gv, self.type_ptr()) } fn codegen_static(&self, def_id: DefId) { diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 40248a9009ae7..e4da540da5cd3 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -1500,6 +1500,18 @@ fn build_vtable_type_di_node<'ll, 'tcx>( .di_node } +fn find_vtable_behind_cast<'ll>(vtable: &'ll Value) -> &'ll Value { + // The vtable is a global variable, which may be behind an addrspacecast. + unsafe { + if let Some(c) = llvm::LLVMIsAConstantExpr(vtable) { + if llvm::LLVMGetConstOpcode(c) == llvm::Opcode::AddrSpaceCast { + return llvm::LLVMGetOperand(c, 0).unwrap(); + } + } + } + vtable +} + pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>( cx: &CodegenCx<'ll, 'tcx>, ty: Ty<'tcx>, @@ -1520,6 +1532,7 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>( let Some(trait_ref) = trait_ref else { return }; + let vtable = find_vtable_behind_cast(vtable); let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty); let trait_ref_self = cx.tcx.erase_regions(trait_ref_self); let trait_def_id = trait_ref_self.def_id(); @@ -1593,6 +1606,8 @@ pub(crate) fn create_vtable_di_node<'ll, 'tcx>( return; } + let vtable = find_vtable_behind_cast(vtable); + // When full debuginfo is enabled, we want to try and prevent vtables from being // merged. Otherwise debuggers will have a hard time mapping from dyn pointer // to concrete type. diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 472d4a3a72b3f..8ae9c1e501706 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -660,6 +660,79 @@ pub enum MemoryEffects { InaccessibleMemOnly, } +/// LLVMOpcode +#[derive(Copy, Clone, PartialEq, Eq)] +#[repr(C)] +pub enum Opcode { + Ret = 1, + Br = 2, + Switch = 3, + IndirectBr = 4, + Invoke = 5, + Unreachable = 7, + CallBr = 67, + FNeg = 66, + Add = 8, + FAdd = 9, + Sub = 10, + FSub = 11, + Mul = 12, + FMul = 13, + UDiv = 14, + SDiv = 15, + FDiv = 16, + URem = 17, + SRem = 18, + FRem = 19, + Shl = 20, + LShr = 21, + AShr = 22, + And = 23, + Or = 24, + Xor = 25, + Alloca = 26, + Load = 27, + Store = 28, + GetElementPtr = 29, + Trunc = 30, + ZExt = 31, + SExt = 32, + FPToUI = 33, + FPToSI = 34, + UIToFP = 35, + SIToFP = 36, + FPTrunc = 37, + FPExt = 38, + PtrToInt = 39, + IntToPtr = 40, + BitCast = 41, + AddrSpaceCast = 60, + ICmp = 42, + FCmp = 43, + PHI = 44, + Call = 45, + Select = 46, + UserOp1 = 47, + UserOp2 = 48, + VAArg = 49, + ExtractElement = 50, + InsertElement = 51, + ShuffleVector = 52, + ExtractValue = 53, + InsertValue = 54, + Freeze = 68, + Fence = 55, + AtomicCmpXchg = 56, + AtomicRMW = 57, + Resume = 58, + LandingPad = 59, + CleanupRet = 61, + CatchRet = 62, + CatchPad = 63, + CleanupPad = 64, + CatchSwitch = 65, +} + unsafe extern "C" { type Opaque; } @@ -975,7 +1048,10 @@ unsafe extern "C" { pub fn LLVMConstPtrToInt<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value; pub fn LLVMConstIntToPtr<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value; pub fn LLVMConstBitCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value; + pub fn LLVMConstPointerCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value; pub fn LLVMGetAggregateElement(ConstantVal: &Value, Idx: c_uint) -> Option<&Value>; + pub fn LLVMGetConstOpcode(ConstantVal: &Value) -> Opcode; + pub fn LLVMIsAConstantExpr(Val: &Value) -> Option<&Value>; // Operations on global variables, functions, and aliases (globals) pub fn LLVMIsDeclaration(Global: &Value) -> Bool; @@ -1032,6 +1108,7 @@ unsafe extern "C" { // Operations on instructions pub fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>; pub fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock; + pub fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>; // Operations on call sites pub fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint); From b06e840d9e8e27ac1f79be5e6d2778455146f665 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Fri, 24 Jan 2025 00:37:05 +0100 Subject: [PATCH 2/2] Add comments about address spaces --- compiler/rustc_codegen_llvm/src/builder.rs | 2 +- compiler/rustc_codegen_llvm/src/common.rs | 2 ++ compiler/rustc_codegen_llvm/src/consts.rs | 13 +++++++++++++ .../rustc_codegen_llvm/src/debuginfo/metadata.rs | 10 ++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 1b99fea8d5154..4d572dc56fb21 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1226,7 +1226,7 @@ impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> { fn get_static(&mut self, def_id: DefId) -> &'ll Value { // Forward to the `get_static` method of `CodegenCx` let s = self.cx().get_static(def_id); - // Cast to default address space if statics are in a different addrspace + // Cast to default address space if globals are in a different addrspace self.cx().const_pointercast(s, self.type_ptr()) } } diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index bd792c02810c6..5031df06b05b0 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -221,6 +221,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global); } llvm::set_linkage(g, llvm::Linkage::InternalLinkage); + // Cast to default address space if globals are in a different addrspace let g = self.const_pointercast(g, self.type_ptr()); (s.to_owned(), g) }) @@ -324,6 +325,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { let llval = unsafe { llvm::LLVMConstInBoundsGEP2( self.type_i8(), + // Cast to the required address space if necessary self.const_pointercast(base_addr, self.type_ptr_ext(base_addr_space)), &self.const_usize(offset.bytes()), 1, diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index b88fd133e8e8a..771ebf2057f9a 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -214,6 +214,10 @@ impl<'ll> CodegenCx<'ll, '_> { unsafe { llvm::LLVMConstPointerCast(val, ty) } } + /// Create a global variable. + /// + /// The returned global variable is a pointer in the default address space for globals. + /// Fails if a symbol with the given name already exists. pub(crate) fn static_addr_of_mut( &self, cv: &'ll Value, @@ -237,6 +241,9 @@ impl<'ll> CodegenCx<'ll, '_> { gv } + /// Create a global constant. + /// + /// The returned global variable is a pointer in the default address space for globals. pub(crate) fn static_addr_of_impl( &self, cv: &'ll Value, @@ -534,8 +541,14 @@ impl<'ll> CodegenCx<'ll, '_> { } impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> { + /// Get a pointer to a global variable. + /// + /// The pointer will always be in the default address space. If global variables default to a + /// different address space, an addrspacecast is inserted. fn static_addr_of(&self, cv: &'ll Value, align: Align, kind: Option<&str>) -> &'ll Value { let gv = self.static_addr_of_impl(cv, align, kind); + // static_addr_of_impl returns the bare global variable, which might not be in the default + // address space. Cast to the default address space if necessary. self.const_pointercast(gv, self.type_ptr()) } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index e4da540da5cd3..b9e3a6975d658 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -1500,6 +1500,14 @@ fn build_vtable_type_di_node<'ll, 'tcx>( .di_node } +/// Get the global variable for the vtable. +/// +/// When using global variables, we may have created an addrspacecast to get a pointer to the +/// default address space if global variables are created in a different address space. +/// For modifying the vtable, we need the real global variable. This function accepts either a +/// global variable (which is simply returned), or an addrspacecast constant expression. +/// If the given value is an addrspacecast, the cast is removed and the global variable behind +/// the cast is returned. fn find_vtable_behind_cast<'ll>(vtable: &'ll Value) -> &'ll Value { // The vtable is a global variable, which may be behind an addrspacecast. unsafe { @@ -1532,6 +1540,7 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>( let Some(trait_ref) = trait_ref else { return }; + // Unwrap potential addrspacecast let vtable = find_vtable_behind_cast(vtable); let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty); let trait_ref_self = cx.tcx.erase_regions(trait_ref_self); @@ -1606,6 +1615,7 @@ pub(crate) fn create_vtable_di_node<'ll, 'tcx>( return; } + // Unwrap potential addrspacecast let vtable = find_vtable_behind_cast(vtable); // When full debuginfo is enabled, we want to try and prevent vtables from being