From ff0fa8f1d16763f178df488b473ec2ab37da7429 Mon Sep 17 00:00:00 2001
From: Michael Woerister <michaelwoerister@posteo>
Date: Thu, 31 Jul 2014 15:05:08 +0200
Subject: [PATCH] Use a versioning scheme for bytecode objects in rlibs.

Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs.

While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014.
---
 src/librustc/back/link.rs | 103 ++++++++++++++++++++++++++++++++------
 src/librustc/back/lto.rs  | 103 +++++++++++++++++++++++++++++---------
 2 files changed, 168 insertions(+), 38 deletions(-)

diff --git a/src/librustc/back/link.rs b/src/librustc/back/link.rs
index 0cf884eccbcb3..0909765414964 100644
--- a/src/librustc/back/link.rs
+++ b/src/librustc/back/link.rs
@@ -32,6 +32,7 @@ use std::char;
 use std::collections::HashSet;
 use std::io::{fs, TempDir, Command};
 use std::io;
+use std::mem;
 use std::ptr;
 use std::str;
 use std::string::String;
@@ -45,6 +46,36 @@ use syntax::attr::AttrMetaMethods;
 use syntax::codemap::Span;
 use syntax::parse::token;
 
+// RLIB LLVM-BYTECODE OBJECT LAYOUT
+// Version 1
+// Bytes    Data
+// 0..10    "RUST_OBJECT" encoded in ASCII
+// 11..14   format version as little-endian u32
+// 15..22   size in bytes of deflate compressed LLVM bitcode as
+//          little-endian u64
+// 23..     compressed LLVM bitcode
+
+// This is the "magic number" expected at the beginning of a LLVM bytecode
+// object in an rlib.
+pub static RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
+
+// The version number this compiler will write to bytecode objects in rlibs
+pub static RLIB_BYTECODE_OBJECT_VERSION: u32 = 1;
+
+// The offset in bytes the bytecode object format version number can be found at
+pub static RLIB_BYTECODE_OBJECT_VERSION_OFFSET: uint = 11;
+
+// The offset in bytes the size of the compressed bytecode can be found at in
+// format version 1
+pub static RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET: uint =
+    RLIB_BYTECODE_OBJECT_VERSION_OFFSET + 4;
+
+// The offset in bytes the compressed LLVM bytecode can be found at in format
+// version 1
+pub static RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: uint =
+    RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8;
+
+
 #[deriving(Clone, PartialEq, PartialOrd, Ord, Eq)]
 pub enum OutputType {
     OutputTypeBitcode,
@@ -1103,28 +1134,44 @@ fn link_rlib<'a>(sess: &'a Session,
             // is never exactly 16 bytes long by adding a 16 byte extension to
             // it. This is to work around a bug in LLDB that would cause it to
             // crash if the name of a file in an archive was exactly 16 bytes.
-            let bc = obj_filename.with_extension("bc");
-            let bc_deflated = obj_filename.with_extension("bytecode.deflate");
-            match fs::File::open(&bc).read_to_end().and_then(|data| {
-                fs::File::create(&bc_deflated)
-                    .write(match flate::deflate_bytes(data.as_slice()) {
-                        Some(compressed) => compressed,
-                        None => sess.fatal("failed to compress bytecode")
-                     }.as_slice())
-            }) {
+            let bc_filename = obj_filename.with_extension("bc");
+            let bc_deflated_filename = obj_filename.with_extension("bytecode.deflate");
+
+            let bc_data = match fs::File::open(&bc_filename).read_to_end() {
+                Ok(buffer) => buffer,
+                Err(e) => sess.fatal(format!("failed to read bytecode: {}",
+                                             e).as_slice())
+            };
+
+            let bc_data_deflated = match flate::deflate_bytes(bc_data.as_slice()) {
+                Some(compressed) => compressed,
+                None => sess.fatal(format!("failed to compress bytecode from {}",
+                                           bc_filename.display()).as_slice())
+            };
+
+            let mut bc_file_deflated = match fs::File::create(&bc_deflated_filename) {
+                Ok(file) => file,
+                Err(e) => {
+                    sess.fatal(format!("failed to create compressed bytecode \
+                                        file: {}", e).as_slice())
+                }
+            };
+
+            match write_rlib_bytecode_object_v1(&mut bc_file_deflated,
+                                                bc_data_deflated.as_slice()) {
                 Ok(()) => {}
                 Err(e) => {
                     sess.err(format!("failed to write compressed bytecode: \
-                                      {}",
-                                     e).as_slice());
+                                      {}", e).as_slice());
                     sess.abort_if_errors()
                 }
-            }
-            ab.add_file(&bc_deflated).unwrap();
-            remove(sess, &bc_deflated);
+            };
+
+            ab.add_file(&bc_deflated_filename).unwrap();
+            remove(sess, &bc_deflated_filename);
             if !sess.opts.cg.save_temps &&
                !sess.opts.output_types.contains(&OutputTypeBitcode) {
-                remove(sess, &bc);
+                remove(sess, &bc_filename);
             }
         }
 
@@ -1134,6 +1181,32 @@ fn link_rlib<'a>(sess: &'a Session,
     ab
 }
 
+fn write_rlib_bytecode_object_v1<T: Writer>(writer: &mut T,
+                                            bc_data_deflated: &[u8])
+                                         -> ::std::io::IoResult<()> {
+    let bc_data_deflated_size: u64 = bc_data_deflated.as_slice().len() as u64;
+
+    try! { writer.write(RLIB_BYTECODE_OBJECT_MAGIC) };
+    try! { writer.write_le_u32(1) };
+    try! { writer.write_le_u64(bc_data_deflated_size) };
+    try! { writer.write(bc_data_deflated.as_slice()) };
+
+    let number_of_bytes_written_so_far =
+        RLIB_BYTECODE_OBJECT_MAGIC.len() +                // magic id
+        mem::size_of_val(&RLIB_BYTECODE_OBJECT_VERSION) + // version
+        mem::size_of_val(&bc_data_deflated_size) +        // data size field
+        bc_data_deflated_size as uint;                    // actual data
+
+    // If the number of bytes written to the object so far is odd, add a
+    // padding byte to make it even. This works around a crash bug in LLDB
+    // (see issue #15950)
+    if number_of_bytes_written_so_far % 2 == 1 {
+        try! { writer.write_u8(0) };
+    }
+
+    return Ok(());
+}
+
 // Create a static archive
 //
 // This is essentially the same thing as an rlib, but it also involves adding
diff --git a/src/librustc/back/lto.rs b/src/librustc/back/lto.rs
index c51f1615d5980..4212513f56a40 100644
--- a/src/librustc/back/lto.rs
+++ b/src/librustc/back/lto.rs
@@ -20,6 +20,8 @@ use util::common::time;
 use libc;
 use flate;
 
+use std::mem;
+
 pub fn run(sess: &session::Session, llmod: ModuleRef,
            tm: TargetMachineRef, reachable: &[String]) {
     if sess.opts.cg.prefer_dynamic {
@@ -57,28 +59,58 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
         let file = path.filename_str().unwrap();
         let file = file.slice(3, file.len() - 5); // chop off lib/.rlib
         debug!("reading {}", file);
-        let bc = time(sess.time_passes(),
-                      format!("read {}.bytecode.deflate", name).as_slice(),
-                      (),
-                      |_| {
-                          archive.read(format!("{}.bytecode.deflate",
-                                               file).as_slice())
-                      });
-        let bc = bc.expect("missing compressed bytecode in archive!");
-        let bc = time(sess.time_passes(),
-                      format!("inflate {}.bc", file).as_slice(),
-                      (),
-                      |_| {
-                          match flate::inflate_bytes(bc) {
-                              Some(bc) => bc,
-                              None => {
-                                  sess.fatal(format!("failed to decompress \
-                                                      bc of `{}`",
-                                                     name).as_slice())
-                              }
-                          }
-                      });
-        let ptr = bc.as_slice().as_ptr();
+        let bc_encoded = time(sess.time_passes(),
+                              format!("read {}.bytecode.deflate", name).as_slice(),
+                              (),
+                              |_| {
+                                  archive.read(format!("{}.bytecode.deflate",
+                                                       file).as_slice())
+                              });
+        let bc_encoded = bc_encoded.expect("missing compressed bytecode in archive!");
+        let bc_extractor = if is_versioned_bytecode_format(bc_encoded) {
+            |_| {
+                // Read the version
+                let version = extract_bytecode_format_version(bc_encoded);
+
+                if version == 1 {
+                    // The only version existing so far
+                    let data_size = extract_compressed_bytecode_size_v1(bc_encoded);
+                    let compressed_data = bc_encoded.slice(
+                        link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET,
+                        link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET + data_size as uint);
+
+                    match flate::inflate_bytes(compressed_data) {
+                        Some(inflated) => inflated,
+                        None => {
+                            sess.fatal(format!("failed to decompress bc of `{}`",
+                                               name).as_slice())
+                        }
+                    }
+                } else {
+                    sess.fatal(format!("Unsupported bytecode format version {}",
+                                       version).as_slice())
+                }
+            }
+        } else {
+            // the object must be in the old, pre-versioning format, so simply
+            // inflate everything and let LLVM decide if it can make sense of it
+            |_| {
+                match flate::inflate_bytes(bc_encoded) {
+                    Some(bc) => bc,
+                    None => {
+                        sess.fatal(format!("failed to decompress bc of `{}`",
+                                           name).as_slice())
+                    }
+                }
+            }
+        };
+
+        let bc_decoded = time(sess.time_passes(),
+                              format!("decode {}.bc", file).as_slice(),
+                              (),
+                              bc_extractor);
+
+        let ptr = bc_decoded.as_slice().as_ptr();
         debug!("linking {}", name);
         time(sess.time_passes(),
              format!("ll link {}", name).as_slice(),
@@ -86,7 +118,7 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
              |()| unsafe {
             if !llvm::LLVMRustLinkInExternalBitcode(llmod,
                                                     ptr as *const libc::c_char,
-                                                    bc.len() as libc::size_t) {
+                                                    bc_decoded.len() as libc::size_t) {
                 link::llvm_err(sess,
                                format!("failed to load bc of `{}`",
                                        name.as_slice()));
@@ -137,3 +169,28 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
     }
     debug!("lto done");
 }
+
+fn is_versioned_bytecode_format(bc: &[u8]) -> bool {
+    let magic_id_byte_count = link::RLIB_BYTECODE_OBJECT_MAGIC.len();
+    return bc.len() > magic_id_byte_count &&
+           bc.slice(0, magic_id_byte_count) == link::RLIB_BYTECODE_OBJECT_MAGIC;
+}
+
+fn extract_bytecode_format_version(bc: &[u8]) -> u32 {
+    return read_from_le_bytes::<u32>(bc, link::RLIB_BYTECODE_OBJECT_VERSION_OFFSET);
+}
+
+fn extract_compressed_bytecode_size_v1(bc: &[u8]) -> u64 {
+    return read_from_le_bytes::<u64>(bc, link::RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET);
+}
+
+fn read_from_le_bytes<T: Int>(bytes: &[u8], position_in_bytes: uint) -> T {
+    let byte_data = bytes.slice(position_in_bytes,
+                                position_in_bytes + mem::size_of::<T>());
+    let data = unsafe {
+        *(byte_data.as_ptr() as *const T)
+    };
+
+    Int::from_le(data)
+}
+