Skip to content

Commit 0327873

Browse files
authored
Merge pull request #22 from dylanowen/utf
Added support for unicode characters
2 parents db069e3 + a887947 commit 0327873

File tree

1 file changed

+35
-3
lines changed

1 file changed

+35
-3
lines changed

src/common.rs

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,37 @@ fn unquote_string(s: &str) -> Result<String, Error<Token, Token>> {
148148
let mut res = String::with_capacity(s.len());
149149
debug_assert!(s.starts_with('"') && s.ends_with('"'));
150150
let mut chars = s[1..s.len()-1].chars();
151+
let mut temp_code_point = String::with_capacity(4);
151152
while let Some(c) = chars.next() {
152153
match c {
153154
'\\' => {
154-
match chars.next().expect("slash cant be and the end") {
155+
match chars.next().expect("slash cant be at the end") {
155156
c@'"' | c@'\\' | c@'/' => res.push(c),
156157
'b' => res.push('\u{0010}'),
157158
'f' => res.push('\u{000C}'),
158159
'n' => res.push('\n'),
159160
'r' => res.push('\r'),
160161
't' => res.push('\t'),
161162
'u' => {
162-
unimplemented!();
163-
}
163+
temp_code_point.clear();
164+
for _ in 0..4 {
165+
match chars.next() {
166+
Some(inner_c) => temp_code_point.push(inner_c),
167+
None => return Err(Error::unexpected_message(
168+
format_args!("\\u must have 4 characters after it, only found '{}'", temp_code_point)
169+
)),
170+
}
171+
}
172+
173+
// convert our hex string into a u32, then convert that into a char
174+
match u32::from_str_radix(&temp_code_point, 16).map(std::char::from_u32) {
175+
Ok(Some(unicode_char)) => res.push(unicode_char),
176+
_ => {
177+
return Err(Error::unexpected_message(
178+
format_args!("{} is not a valid unicode code point", temp_code_point)))
179+
}
180+
}
181+
},
164182
c => {
165183
return Err(Error::unexpected_message(
166184
format_args!("bad escaped char {:?}", c)));
@@ -263,6 +281,7 @@ pub fn parse_type<'a>(input: &mut TokenStream<'a>)
263281
#[cfg(test)]
264282
mod tests {
265283
use super::Number;
284+
use super::unquote_string;
266285

267286
#[test]
268287
fn number_from_i32_and_to_i64_conversion() {
@@ -271,4 +290,17 @@ mod tests {
271290
assert_eq!(Number::from(i32::min_value()).as_i64(), Some(i32::min_value() as i64));
272291
assert_eq!(Number::from(i32::max_value()).as_i64(), Some(i32::max_value() as i64));
273292
}
293+
294+
#[test]
295+
fn unquote_unicode_string() {
296+
// basic tests
297+
assert_eq!(unquote_string(r#""\u0009""#).expect(""), "\u{0009}");
298+
assert_eq!(unquote_string(r#""\u000A""#).expect(""), "\u{000A}");
299+
assert_eq!(unquote_string(r#""\u000D""#).expect(""), "\u{000D}");
300+
assert_eq!(unquote_string(r#""\u0020""#).expect(""), "\u{0020}");
301+
assert_eq!(unquote_string(r#""\uFFFF""#).expect(""), "\u{FFFF}");
302+
303+
// a more complex string
304+
assert_eq!(unquote_string(r#""\u0009 hello \u000A there""#).expect(""), "\u{0009} hello \u{000A} there");
305+
}
274306
}

0 commit comments

Comments
 (0)