From 0eb552a835500ed964836a8e0e3111843bff8aa1 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Thu, 23 Jul 2015 12:24:27 +0200 Subject: [PATCH 1/2] wtf8, char: Replace uses of `mem::transmute` with more specific functions --- src/libcore/char.rs | 27 +++++++++++++++------------ src/librustc_unicode/char.rs | 2 +- src/libstd/sys/common/wtf8.rs | 31 +++++++++++++++++++------------ 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 88aa805668cfa..84a0ed5ab3f5e 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -84,10 +84,17 @@ pub fn from_u32(i: u32) -> Option { if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { None } else { - Some(unsafe { transmute(i) }) + Some(unsafe { from_u32_unchecked(i) }) } } +/// Converts a `u32` to an `char`, not checking whether it is a valid unicode +/// codepoint. +#[inline] +pub unsafe fn from_u32_unchecked(i: u32) -> char { + transmute(i) +} + /// Converts a number to the character representing it. /// /// # Return value @@ -115,12 +122,11 @@ pub fn from_digit(num: u32, radix: u32) -> Option { panic!("from_digit: radix is too high (maximum 36)"); } if num < radix { - unsafe { - if num < 10 { - Some(transmute('0' as u32 + num)) - } else { - Some(transmute('a' as u32 + num - 10)) - } + let num = num as u8; + if num < 10 { + Some((b'0' + num) as char) + } else { + Some((b'a' + num - 10) as char) } } else { None @@ -318,16 +324,13 @@ impl Iterator for EscapeUnicode { Some('{') } EscapeUnicodeState::Value(offset) => { - let v = match ((self.c as i32) >> (offset * 4)) & 0xf { - i @ 0 ... 9 => '0' as i32 + i, - i => 'a' as i32 + (i - 10) - }; + let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap(); if offset == 0 { self.state = EscapeUnicodeState::RightBrace; } else { self.state = EscapeUnicodeState::Value(offset - 1); } - Some(unsafe { transmute(v) }) + Some(c) } EscapeUnicodeState::RightBrace => { self.state = EscapeUnicodeState::Done; diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index 42c19ee6a204d..34b0ae18d4fe8 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -35,7 +35,7 @@ use core::iter::Iterator; use tables::{derived_property, property, general_category, conversions, charwidth}; // stable reexports -pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault}; +pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicode, EscapeDefault}; // unstable reexports #[allow(deprecated)] diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs index 8ea673d2162d1..6f15d606724e5 100644 --- a/src/libstd/sys/common/wtf8.rs +++ b/src/libstd/sys/common/wtf8.rs @@ -32,17 +32,18 @@ use core::str::next_code_point; use ascii::*; use borrow::Cow; +use char; use cmp; use fmt; use hash::{Hash, Hasher}; use iter::FromIterator; use mem; use ops; +use rustc_unicode::str::{Utf16Item, utf16_items}; use slice; use str; use string::String; use sys_common::AsInner; -use rustc_unicode::str::{Utf16Item, utf16_items}; use vec::Vec; const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD"; @@ -107,7 +108,7 @@ impl CodePoint { pub fn to_char(&self) -> Option { match self.value { 0xD800 ... 0xDFFF => None, - _ => Some(unsafe { mem::transmute(self.value) }) + _ => Some(unsafe { char::from_u32_unchecked(self.value) }) } } @@ -213,18 +214,16 @@ impl Wtf8Buf { // Attempt to not use an intermediate buffer by just pushing bytes // directly onto this string. let slice = slice::from_raw_parts_mut( - self.bytes.as_mut_ptr().offset(cur_len as isize), - 4 + self.bytes.as_mut_ptr().offset(cur_len as isize), 4 ); - let used = encode_utf8_raw(code_point.value, mem::transmute(slice)) - .unwrap_or(0); + let used = encode_utf8_raw(code_point.value, slice).unwrap(); self.bytes.set_len(cur_len + used); } } #[inline] pub fn as_slice(&self) -> &Wtf8 { - unsafe { mem::transmute(&*self.bytes) } + unsafe { Wtf8::from_bytes_unchecked(&self.bytes) } } /// Reserves capacity for at least `additional` more bytes to be inserted @@ -457,7 +456,16 @@ impl Wtf8 { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_str(value: &str) -> &Wtf8 { - unsafe { mem::transmute(value.as_bytes()) } + unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) } + } + + /// Creates a WTF-8 slice from a WTF-8 byte slice. + /// + /// Since the byte slice is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { + mem::transmute(value) } /// Returns the length, in WTF-8 bytes. @@ -682,7 +690,7 @@ fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 { #[inline] fn decode_surrogate_pair(lead: u16, trail: u16) -> char { let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32); - unsafe { mem::transmute(code_point) } + unsafe { char::from_u32_unchecked(code_point) } } /// Copied from core::str::StrPrelude::is_char_boundary @@ -699,7 +707,7 @@ pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool { #[inline] pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { // memory layout of an &[u8] and &Wtf8 are the same - mem::transmute(slice::from_raw_parts( + Wtf8::from_bytes_unchecked(slice::from_raw_parts( s.bytes.as_ptr().offset(begin as isize), end - begin )) @@ -821,7 +829,6 @@ mod tests { use prelude::v1::*; use borrow::Cow; use super::*; - use mem::transmute; #[test] fn code_point_from_u32() { @@ -962,7 +969,7 @@ mod tests { string.push_wtf8(Wtf8::from_str(" 💩")); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); - fn w(value: &[u8]) -> &Wtf8 { unsafe { transmute(value) } } + fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } } let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\xBD")); // lead From c2fca7c95742cdd25198eae42d233d49db7026ea Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Fri, 24 Jul 2015 00:45:21 +0200 Subject: [PATCH 2/2] Add unstable attribute to `char::from_u32_unchecked` --- src/libcore/char.rs | 1 + src/libstd/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 84a0ed5ab3f5e..c6d0e97a0cd00 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -91,6 +91,7 @@ pub fn from_u32(i: u32) -> Option { /// Converts a `u32` to an `char`, not checking whether it is a valid unicode /// codepoint. #[inline] +#[unstable(feature = "char_from_unchecked", reason = "recently added API")] pub unsafe fn from_u32_unchecked(i: u32) -> char { transmute(i) } diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 82bc1314ad547..03e61247a835a 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -209,6 +209,7 @@ #![feature(borrow_state)] #![feature(box_raw)] #![feature(box_syntax)] +#![feature(char_from_unchecked)] #![feature(char_internals)] #![feature(clone_from_slice)] #![feature(collections)]