diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index aaa7da312f29a..a8bfc29151f33 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -74,7 +74,8 @@ use vec::Vec; pub use core::str::{from_utf8, CharEq, Chars, CharOffsets}; pub use core::str::{Bytes, CharSplits}; pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits}; -pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items}; +pub use core::str::{Utf16Encoder, Utf16CodeUnits}; +pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items}; pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items}; pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange}; pub use core::str::{FromStr, from_str}; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 24f26b15f27ac..68e490ecb19c4 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -762,11 +762,33 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> { /// Use with the `std::iter` module. #[deriving(Clone)] pub struct Utf16CodeUnits<'a> { - chars: Chars<'a>, - extra: u16 + encoder: Utf16Encoder> } impl<'a> Iterator for Utf16CodeUnits<'a> { + #[inline] + fn next(&mut self) -> Option { self.encoder.next() } + + #[inline] + fn size_hint(&self) -> (uint, Option) { self.encoder.size_hint() } +} + + +/// Iterator adaptor for encoding `char`s to UTF-16. +#[deriving(Clone)] +pub struct Utf16Encoder { + chars: I, + extra: u16 +} + +impl Utf16Encoder { + /// Create an UTF-16 encoder from any `char` iterator. + pub fn new(chars: I) -> Utf16Encoder where I: Iterator { + Utf16Encoder { chars: chars, extra: 0 } + } +} + +impl Iterator for Utf16Encoder where I: Iterator { #[inline] fn next(&mut self) -> Option { if self.extra != 0 { @@ -2225,7 +2247,7 @@ impl StrPrelude for str { #[inline] fn utf16_units(&self) -> Utf16CodeUnits { - Utf16CodeUnits{ chars: self.chars(), extra: 0} + Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) } } #[inline] diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index 5f44fd807ccae..9574aeb3762de 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -114,3 +114,10 @@ fn test_rev_split_char_iterator_no_trailing() { split.reverse(); assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]); } + +#[test] +fn test_utf16_code_units() { + use core::str::Utf16Encoder; + assert_eq!(Utf16Encoder::new(vec!['é', '\U0001F4A9'].into_iter()).collect::>(), + vec![0xE9, 0xD83D, 0xDCA9]) +}