From 0eb552a835500ed964836a8e0e3111843bff8aa1 Mon Sep 17 00:00:00 2001
From: Tobias Bucher <tobiasbucher5991@gmail.com>
Date: Thu, 23 Jul 2015 12:24:27 +0200
Subject: [PATCH 1/2] wtf8, char: Replace uses of `mem::transmute` with more
 specific functions

---
 src/libcore/char.rs           | 27 +++++++++++++++------------
 src/librustc_unicode/char.rs  |  2 +-
 src/libstd/sys/common/wtf8.rs | 31 +++++++++++++++++++------------
 3 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 88aa805668cfa..84a0ed5ab3f5e 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -84,10 +84,17 @@ pub fn from_u32(i: u32) -> Option<char> {
     if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
         None
     } else {
-        Some(unsafe { transmute(i) })
+        Some(unsafe { from_u32_unchecked(i) })
     }
 }
 
+/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
+/// codepoint.
+#[inline]
+pub unsafe fn from_u32_unchecked(i: u32) -> char {
+    transmute(i)
+}
+
 /// Converts a number to the character representing it.
 ///
 /// # Return value
@@ -115,12 +122,11 @@ pub fn from_digit(num: u32, radix: u32) -> Option<char> {
         panic!("from_digit: radix is too high (maximum 36)");
     }
     if num < radix {
-        unsafe {
-            if num < 10 {
-                Some(transmute('0' as u32 + num))
-            } else {
-                Some(transmute('a' as u32 + num - 10))
-            }
+        let num = num as u8;
+        if num < 10 {
+            Some((b'0' + num) as char)
+        } else {
+            Some((b'a' + num - 10) as char)
         }
     } else {
         None
@@ -318,16 +324,13 @@ impl Iterator for EscapeUnicode {
                 Some('{')
             }
             EscapeUnicodeState::Value(offset) => {
-                let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
-                    i @ 0 ... 9 => '0' as i32 + i,
-                    i => 'a' as i32 + (i - 10)
-                };
+                let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap();
                 if offset == 0 {
                     self.state = EscapeUnicodeState::RightBrace;
                 } else {
                     self.state = EscapeUnicodeState::Value(offset - 1);
                 }
-                Some(unsafe { transmute(v) })
+                Some(c)
             }
             EscapeUnicodeState::RightBrace => {
                 self.state = EscapeUnicodeState::Done;
diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs
index 42c19ee6a204d..34b0ae18d4fe8 100644
--- a/src/librustc_unicode/char.rs
+++ b/src/librustc_unicode/char.rs
@@ -35,7 +35,7 @@ use core::iter::Iterator;
 use tables::{derived_property, property, general_category, conversions, charwidth};
 
 // stable reexports
-pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
+pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicode, EscapeDefault};
 
 // unstable reexports
 #[allow(deprecated)]
diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs
index 8ea673d2162d1..6f15d606724e5 100644
--- a/src/libstd/sys/common/wtf8.rs
+++ b/src/libstd/sys/common/wtf8.rs
@@ -32,17 +32,18 @@ use core::str::next_code_point;
 
 use ascii::*;
 use borrow::Cow;
+use char;
 use cmp;
 use fmt;
 use hash::{Hash, Hasher};
 use iter::FromIterator;
 use mem;
 use ops;
+use rustc_unicode::str::{Utf16Item, utf16_items};
 use slice;
 use str;
 use string::String;
 use sys_common::AsInner;
-use rustc_unicode::str::{Utf16Item, utf16_items};
 use vec::Vec;
 
 const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD";
@@ -107,7 +108,7 @@ impl CodePoint {
     pub fn to_char(&self) -> Option<char> {
         match self.value {
             0xD800 ... 0xDFFF => None,
-            _ => Some(unsafe { mem::transmute(self.value) })
+            _ => Some(unsafe { char::from_u32_unchecked(self.value) })
         }
     }
 
@@ -213,18 +214,16 @@ impl Wtf8Buf {
             // Attempt to not use an intermediate buffer by just pushing bytes
             // directly onto this string.
             let slice = slice::from_raw_parts_mut(
-                self.bytes.as_mut_ptr().offset(cur_len as isize),
-                4
+                self.bytes.as_mut_ptr().offset(cur_len as isize), 4
             );
-            let used = encode_utf8_raw(code_point.value, mem::transmute(slice))
-                .unwrap_or(0);
+            let used = encode_utf8_raw(code_point.value, slice).unwrap();
             self.bytes.set_len(cur_len + used);
         }
     }
 
     #[inline]
     pub fn as_slice(&self) -> &Wtf8 {
-        unsafe { mem::transmute(&*self.bytes) }
+        unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
     }
 
     /// Reserves capacity for at least `additional` more bytes to be inserted
@@ -457,7 +456,16 @@ impl Wtf8 {
     /// Since WTF-8 is a superset of UTF-8, this always succeeds.
     #[inline]
     pub fn from_str(value: &str) -> &Wtf8 {
-        unsafe { mem::transmute(value.as_bytes()) }
+        unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
+    }
+
+    /// Creates a WTF-8 slice from a WTF-8 byte slice.
+    ///
+    /// Since the byte slice is not checked for valid WTF-8, this functions is
+    /// marked unsafe.
+    #[inline]
+    unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
+        mem::transmute(value)
     }
 
     /// Returns the length, in WTF-8 bytes.
@@ -682,7 +690,7 @@ fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
 #[inline]
 fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
     let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
-    unsafe { mem::transmute(code_point) }
+    unsafe { char::from_u32_unchecked(code_point) }
 }
 
 /// Copied from core::str::StrPrelude::is_char_boundary
@@ -699,7 +707,7 @@ pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
 #[inline]
 pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
     // memory layout of an &[u8] and &Wtf8 are the same
-    mem::transmute(slice::from_raw_parts(
+    Wtf8::from_bytes_unchecked(slice::from_raw_parts(
         s.bytes.as_ptr().offset(begin as isize),
         end - begin
     ))
@@ -821,7 +829,6 @@ mod tests {
     use prelude::v1::*;
     use borrow::Cow;
     use super::*;
-    use mem::transmute;
 
     #[test]
     fn code_point_from_u32() {
@@ -962,7 +969,7 @@ mod tests {
         string.push_wtf8(Wtf8::from_str(" 💩"));
         assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
 
-        fn w(value: &[u8]) -> &Wtf8 { unsafe { transmute(value) } }
+        fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } }
 
         let mut string = Wtf8Buf::new();
         string.push_wtf8(w(b"\xED\xA0\xBD"));  // lead

From c2fca7c95742cdd25198eae42d233d49db7026ea Mon Sep 17 00:00:00 2001
From: Tobias Bucher <tobiasbucher5991@gmail.com>
Date: Fri, 24 Jul 2015 00:45:21 +0200
Subject: [PATCH 2/2] Add unstable attribute to `char::from_u32_unchecked`

---
 src/libcore/char.rs | 1 +
 src/libstd/lib.rs   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 84a0ed5ab3f5e..c6d0e97a0cd00 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -91,6 +91,7 @@ pub fn from_u32(i: u32) -> Option<char> {
 /// Converts a `u32` to an `char`, not checking whether it is a valid unicode
 /// codepoint.
 #[inline]
+#[unstable(feature = "char_from_unchecked", reason = "recently added API")]
 pub unsafe fn from_u32_unchecked(i: u32) -> char {
     transmute(i)
 }
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs
index 82bc1314ad547..03e61247a835a 100644
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@@ -209,6 +209,7 @@
 #![feature(borrow_state)]
 #![feature(box_raw)]
 #![feature(box_syntax)]
+#![feature(char_from_unchecked)]
 #![feature(char_internals)]
 #![feature(clone_from_slice)]
 #![feature(collections)]