From ff0fa8f1d16763f178df488b473ec2ab37da7429 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Thu, 31 Jul 2014 15:05:08 +0200 Subject: [PATCH] Use a versioning scheme for bytecode objects in rlibs. Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs. While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014. --- src/librustc/back/link.rs | 103 ++++++++++++++++++++++++++++++++------ src/librustc/back/lto.rs | 103 +++++++++++++++++++++++++++++--------- 2 files changed, 168 insertions(+), 38 deletions(-) diff --git a/src/librustc/back/link.rs b/src/librustc/back/link.rs index 0cf884eccbcb3..0909765414964 100644 --- a/src/librustc/back/link.rs +++ b/src/librustc/back/link.rs @@ -32,6 +32,7 @@ use std::char; use std::collections::HashSet; use std::io::{fs, TempDir, Command}; use std::io; +use std::mem; use std::ptr; use std::str; use std::string::String; @@ -45,6 +46,36 @@ use syntax::attr::AttrMetaMethods; use syntax::codemap::Span; use syntax::parse::token; +// RLIB LLVM-BYTECODE OBJECT LAYOUT +// Version 1 +// Bytes Data +// 0..10 "RUST_OBJECT" encoded in ASCII +// 11..14 format version as little-endian u32 +// 15..22 size in bytes of deflate compressed LLVM bitcode as +// little-endian u64 +// 23.. compressed LLVM bitcode + +// This is the "magic number" expected at the beginning of a LLVM bytecode +// object in an rlib. +pub static RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT"; + +// The version number this compiler will write to bytecode objects in rlibs +pub static RLIB_BYTECODE_OBJECT_VERSION: u32 = 1; + +// The offset in bytes the bytecode object format version number can be found at +pub static RLIB_BYTECODE_OBJECT_VERSION_OFFSET: uint = 11; + +// The offset in bytes the size of the compressed bytecode can be found at in +// format version 1 +pub static RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET: uint = + RLIB_BYTECODE_OBJECT_VERSION_OFFSET + 4; + +// The offset in bytes the compressed LLVM bytecode can be found at in format +// version 1 +pub static RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: uint = + RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8; + + #[deriving(Clone, PartialEq, PartialOrd, Ord, Eq)] pub enum OutputType { OutputTypeBitcode, @@ -1103,28 +1134,44 @@ fn link_rlib<'a>(sess: &'a Session, // is never exactly 16 bytes long by adding a 16 byte extension to // it. This is to work around a bug in LLDB that would cause it to // crash if the name of a file in an archive was exactly 16 bytes. - let bc = obj_filename.with_extension("bc"); - let bc_deflated = obj_filename.with_extension("bytecode.deflate"); - match fs::File::open(&bc).read_to_end().and_then(|data| { - fs::File::create(&bc_deflated) - .write(match flate::deflate_bytes(data.as_slice()) { - Some(compressed) => compressed, - None => sess.fatal("failed to compress bytecode") - }.as_slice()) - }) { + let bc_filename = obj_filename.with_extension("bc"); + let bc_deflated_filename = obj_filename.with_extension("bytecode.deflate"); + + let bc_data = match fs::File::open(&bc_filename).read_to_end() { + Ok(buffer) => buffer, + Err(e) => sess.fatal(format!("failed to read bytecode: {}", + e).as_slice()) + }; + + let bc_data_deflated = match flate::deflate_bytes(bc_data.as_slice()) { + Some(compressed) => compressed, + None => sess.fatal(format!("failed to compress bytecode from {}", + bc_filename.display()).as_slice()) + }; + + let mut bc_file_deflated = match fs::File::create(&bc_deflated_filename) { + Ok(file) => file, + Err(e) => { + sess.fatal(format!("failed to create compressed bytecode \ + file: {}", e).as_slice()) + } + }; + + match write_rlib_bytecode_object_v1(&mut bc_file_deflated, + bc_data_deflated.as_slice()) { Ok(()) => {} Err(e) => { sess.err(format!("failed to write compressed bytecode: \ - {}", - e).as_slice()); + {}", e).as_slice()); sess.abort_if_errors() } - } - ab.add_file(&bc_deflated).unwrap(); - remove(sess, &bc_deflated); + }; + + ab.add_file(&bc_deflated_filename).unwrap(); + remove(sess, &bc_deflated_filename); if !sess.opts.cg.save_temps && !sess.opts.output_types.contains(&OutputTypeBitcode) { - remove(sess, &bc); + remove(sess, &bc_filename); } } @@ -1134,6 +1181,32 @@ fn link_rlib<'a>(sess: &'a Session, ab } +fn write_rlib_bytecode_object_v1(writer: &mut T, + bc_data_deflated: &[u8]) + -> ::std::io::IoResult<()> { + let bc_data_deflated_size: u64 = bc_data_deflated.as_slice().len() as u64; + + try! { writer.write(RLIB_BYTECODE_OBJECT_MAGIC) }; + try! { writer.write_le_u32(1) }; + try! { writer.write_le_u64(bc_data_deflated_size) }; + try! { writer.write(bc_data_deflated.as_slice()) }; + + let number_of_bytes_written_so_far = + RLIB_BYTECODE_OBJECT_MAGIC.len() + // magic id + mem::size_of_val(&RLIB_BYTECODE_OBJECT_VERSION) + // version + mem::size_of_val(&bc_data_deflated_size) + // data size field + bc_data_deflated_size as uint; // actual data + + // If the number of bytes written to the object so far is odd, add a + // padding byte to make it even. This works around a crash bug in LLDB + // (see issue #15950) + if number_of_bytes_written_so_far % 2 == 1 { + try! { writer.write_u8(0) }; + } + + return Ok(()); +} + // Create a static archive // // This is essentially the same thing as an rlib, but it also involves adding diff --git a/src/librustc/back/lto.rs b/src/librustc/back/lto.rs index c51f1615d5980..4212513f56a40 100644 --- a/src/librustc/back/lto.rs +++ b/src/librustc/back/lto.rs @@ -20,6 +20,8 @@ use util::common::time; use libc; use flate; +use std::mem; + pub fn run(sess: &session::Session, llmod: ModuleRef, tm: TargetMachineRef, reachable: &[String]) { if sess.opts.cg.prefer_dynamic { @@ -57,28 +59,58 @@ pub fn run(sess: &session::Session, llmod: ModuleRef, let file = path.filename_str().unwrap(); let file = file.slice(3, file.len() - 5); // chop off lib/.rlib debug!("reading {}", file); - let bc = time(sess.time_passes(), - format!("read {}.bytecode.deflate", name).as_slice(), - (), - |_| { - archive.read(format!("{}.bytecode.deflate", - file).as_slice()) - }); - let bc = bc.expect("missing compressed bytecode in archive!"); - let bc = time(sess.time_passes(), - format!("inflate {}.bc", file).as_slice(), - (), - |_| { - match flate::inflate_bytes(bc) { - Some(bc) => bc, - None => { - sess.fatal(format!("failed to decompress \ - bc of `{}`", - name).as_slice()) - } - } - }); - let ptr = bc.as_slice().as_ptr(); + let bc_encoded = time(sess.time_passes(), + format!("read {}.bytecode.deflate", name).as_slice(), + (), + |_| { + archive.read(format!("{}.bytecode.deflate", + file).as_slice()) + }); + let bc_encoded = bc_encoded.expect("missing compressed bytecode in archive!"); + let bc_extractor = if is_versioned_bytecode_format(bc_encoded) { + |_| { + // Read the version + let version = extract_bytecode_format_version(bc_encoded); + + if version == 1 { + // The only version existing so far + let data_size = extract_compressed_bytecode_size_v1(bc_encoded); + let compressed_data = bc_encoded.slice( + link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET, + link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET + data_size as uint); + + match flate::inflate_bytes(compressed_data) { + Some(inflated) => inflated, + None => { + sess.fatal(format!("failed to decompress bc of `{}`", + name).as_slice()) + } + } + } else { + sess.fatal(format!("Unsupported bytecode format version {}", + version).as_slice()) + } + } + } else { + // the object must be in the old, pre-versioning format, so simply + // inflate everything and let LLVM decide if it can make sense of it + |_| { + match flate::inflate_bytes(bc_encoded) { + Some(bc) => bc, + None => { + sess.fatal(format!("failed to decompress bc of `{}`", + name).as_slice()) + } + } + } + }; + + let bc_decoded = time(sess.time_passes(), + format!("decode {}.bc", file).as_slice(), + (), + bc_extractor); + + let ptr = bc_decoded.as_slice().as_ptr(); debug!("linking {}", name); time(sess.time_passes(), format!("ll link {}", name).as_slice(), @@ -86,7 +118,7 @@ pub fn run(sess: &session::Session, llmod: ModuleRef, |()| unsafe { if !llvm::LLVMRustLinkInExternalBitcode(llmod, ptr as *const libc::c_char, - bc.len() as libc::size_t) { + bc_decoded.len() as libc::size_t) { link::llvm_err(sess, format!("failed to load bc of `{}`", name.as_slice())); @@ -137,3 +169,28 @@ pub fn run(sess: &session::Session, llmod: ModuleRef, } debug!("lto done"); } + +fn is_versioned_bytecode_format(bc: &[u8]) -> bool { + let magic_id_byte_count = link::RLIB_BYTECODE_OBJECT_MAGIC.len(); + return bc.len() > magic_id_byte_count && + bc.slice(0, magic_id_byte_count) == link::RLIB_BYTECODE_OBJECT_MAGIC; +} + +fn extract_bytecode_format_version(bc: &[u8]) -> u32 { + return read_from_le_bytes::(bc, link::RLIB_BYTECODE_OBJECT_VERSION_OFFSET); +} + +fn extract_compressed_bytecode_size_v1(bc: &[u8]) -> u64 { + return read_from_le_bytes::(bc, link::RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET); +} + +fn read_from_le_bytes(bytes: &[u8], position_in_bytes: uint) -> T { + let byte_data = bytes.slice(position_in_bytes, + position_in_bytes + mem::size_of::()); + let data = unsafe { + *(byte_data.as_ptr() as *const T) + }; + + Int::from_le(data) +} +