Skip to content

Commit 34930ec

Browse files
committed
core: Convert Char::escape_default, escape_unicode to iterators
[breaking-change]
1 parent a7c7273 commit 34930ec

File tree

7 files changed

+110
-43
lines changed

7 files changed

+110
-43
lines changed

src/libcollections/str.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,9 @@ pub trait StrAllocating: Str {
626626
let me = self.as_slice();
627627
let mut out = String::with_capacity(me.len());
628628
for c in me.chars() {
629-
c.escape_default(|c| out.push(c));
629+
for c in c.escape_default() {
630+
out.push(c);
631+
}
630632
}
631633
out
632634
}
@@ -636,7 +638,9 @@ pub trait StrAllocating: Str {
636638
let me = self.as_slice();
637639
let mut out = String::with_capacity(me.len());
638640
for c in me.chars() {
639-
c.escape_unicode(|c| out.push(c));
641+
for c in c.escape_unicode() {
642+
out.push(c);
643+
}
640644
}
641645
out
642646
}

src/libcore/char.rs

Lines changed: 95 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use mem::transmute;
1919
use option::{None, Option, Some};
20-
use iter::range_step;
20+
use iter::{range_step, Iterator, RangeStep};
2121
use slice::SlicePrelude;
2222

2323
// UTF-8 ranges and tags for encoding characters
@@ -165,7 +165,9 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
165165
///
166166
#[deprecated = "use the Char::escape_unicode method"]
167167
pub fn escape_unicode(c: char, f: |char|) {
168-
c.escape_unicode(f)
168+
for char in c.escape_unicode() {
169+
f(char);
170+
}
169171
}
170172

171173
///
@@ -182,7 +184,9 @@ pub fn escape_unicode(c: char, f: |char|) {
182184
///
183185
#[deprecated = "use the Char::escape_default method"]
184186
pub fn escape_default(c: char, f: |char|) {
185-
c.escape_default(f)
187+
for c in c.escape_default() {
188+
f(c);
189+
}
186190
}
187191

188192
/// Returns the amount of bytes this `char` would need if encoded in UTF-8
@@ -266,7 +270,7 @@ pub trait Char {
266270
/// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
267271
/// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
268272
#[unstable = "pending error conventions, trait organization"]
269-
fn escape_unicode(self, f: |char|);
273+
fn escape_unicode(self) -> UnicodeEscapedChars;
270274

271275
/// Returns a 'default' ASCII and C++11-like literal escape of a
272276
/// character.
@@ -281,7 +285,7 @@ pub trait Char {
281285
/// * Any other chars in the range [0x20,0x7e] are not escaped.
282286
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
283287
#[unstable = "pending error conventions, trait organization"]
284-
fn escape_default(self, f: |char|);
288+
fn escape_default(self) -> DefaultEscapedChars;
285289

286290
/// Returns the amount of bytes this character would need if encoded in
287291
/// UTF-8.
@@ -351,38 +355,23 @@ impl Char for char {
351355
fn from_u32(i: u32) -> Option<char> { from_u32(i) }
352356

353357
#[unstable = "pending error conventions, trait organization"]
354-
fn escape_unicode(self, f: |char|) {
355-
// avoid calling str::to_str_radix because we don't really need to allocate
356-
// here.
357-
f('\\');
358-
let pad = match () {
359-
_ if self <= '\xff' => { f('x'); 2 }
360-
_ if self <= '\uffff' => { f('u'); 4 }
361-
_ => { f('U'); 8 }
362-
};
363-
for offset in range_step::<i32>(4 * (pad - 1), -1, -4) {
364-
let offset = offset as uint;
365-
unsafe {
366-
match ((self as i32) >> offset) & 0xf {
367-
i @ 0 ... 9 => { f(transmute('0' as i32 + i)); }
368-
i => { f(transmute('a' as i32 + (i - 10))); }
369-
}
370-
}
371-
}
358+
fn escape_unicode(self) -> UnicodeEscapedChars {
359+
UnicodeEscapedChars { c: self, state: EscapeBackslash }
372360
}
373361

374362
#[unstable = "pending error conventions, trait organization"]
375-
fn escape_default(self, f: |char|) {
376-
match self {
377-
'\t' => { f('\\'); f('t'); }
378-
'\r' => { f('\\'); f('r'); }
379-
'\n' => { f('\\'); f('n'); }
380-
'\\' => { f('\\'); f('\\'); }
381-
'\'' => { f('\\'); f('\''); }
382-
'"' => { f('\\'); f('"'); }
383-
'\x20' ... '\x7e' => { f(self); }
384-
_ => self.escape_unicode(f),
385-
}
363+
fn escape_default(self) -> DefaultEscapedChars {
364+
let init_state = match self {
365+
'\t' => DefaultEscapeBackslash('t'),
366+
'\r' => DefaultEscapeBackslash('r'),
367+
'\n' => DefaultEscapeBackslash('n'),
368+
'\\' => DefaultEscapeBackslash('\\'),
369+
'\'' => DefaultEscapeBackslash('\''),
370+
'"' => DefaultEscapeBackslash('"'),
371+
'\x20' ... '\x7e' => DefaultEscapeChar(self),
372+
_ => DefaultEscapeUnicode(self.escape_unicode())
373+
};
374+
DefaultEscapedChars { state: init_state }
386375
}
387376

388377
#[inline]
@@ -456,3 +445,75 @@ impl Char for char {
456445
}
457446
}
458447
}
448+
449+
/// An iterator over the characters that represent a `char`, as escaped by
450+
/// Rust's unicode escaping rules.
451+
pub struct UnicodeEscapedChars {
452+
c: char,
453+
state: UnicodeEscapedCharsState
454+
}
455+
456+
enum UnicodeEscapedCharsState {
457+
EscapeBackslash,
458+
EscapeType,
459+
EscapeValue(RangeStep<i32>),
460+
}
461+
462+
impl Iterator<char> for UnicodeEscapedChars {
463+
fn next(&mut self) -> Option<char> {
464+
match self.state {
465+
EscapeBackslash => {
466+
self.state = EscapeType;
467+
Some('\\')
468+
}
469+
EscapeType => {
470+
let (typechar, pad) = if self.c <= '\x7f' { ('x', 2) }
471+
else if self.c <= '\uffff' { ('u', 4) }
472+
else { ('U', 8) };
473+
self.state = EscapeValue(range_step(4 * (pad - 1), -1, -4i32));
474+
Some(typechar)
475+
}
476+
EscapeValue(ref mut range_step) => match range_step.next() {
477+
Some(offset) => {
478+
let offset = offset as uint;
479+
let v = match ((self.c as i32) >> offset) & 0xf {
480+
i @ 0 ... 9 => '0' as i32 + i,
481+
i => 'a' as i32 + (i - 10)
482+
};
483+
Some(unsafe { transmute(v) })
484+
}
485+
None => None
486+
}
487+
}
488+
}
489+
}
490+
491+
/// An iterator over the characters that represent a `char`, escaped
492+
/// for maximum portability.
493+
pub struct DefaultEscapedChars {
494+
state: DefaultEscapedCharsState
495+
}
496+
497+
enum DefaultEscapedCharsState {
498+
DefaultEscapeBackslash(char),
499+
DefaultEscapeChar(char),
500+
DefaultEscapeDone,
501+
DefaultEscapeUnicode(UnicodeEscapedChars),
502+
}
503+
504+
impl Iterator<char> for DefaultEscapedChars {
505+
fn next(&mut self) -> Option<char> {
506+
match self.state {
507+
DefaultEscapeBackslash(c) => {
508+
self.state = DefaultEscapeChar(c);
509+
Some('\\')
510+
}
511+
DefaultEscapeChar(c) => {
512+
self.state = DefaultEscapeDone;
513+
Some(c)
514+
}
515+
DefaultEscapeDone => None,
516+
DefaultEscapeUnicode(ref mut iter) => iter.next()
517+
}
518+
}
519+
}

src/libgraphviz/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ impl<'a> LabelText<'a> {
420420
// not escaping \\, since Graphviz escString needs to
421421
// interpret backslashes; see EscStr above.
422422
'\\' => f(c),
423-
_ => c.escape_default(f)
423+
_ => for c in c.escape_default() { f(c) }
424424
}
425425
}
426426
fn escape_str(s: &str) -> String {

src/librustc/back/link.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ pub fn sanitize(s: &str) -> String {
262262

263263
_ => {
264264
let mut tstr = String::new();
265-
char::escape_unicode(c, |c| tstr.push(c));
265+
for c in c.escape_unicode() { tstr.push(c) }
266266
result.push('$');
267267
result.push_str(tstr.as_slice().slice_from(1));
268268
}

src/librustdoc/clean/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,9 +2030,9 @@ fn lit_to_string(lit: &ast::Lit) -> String {
20302030
ast::LitBinary(ref data) => format!("{}", data),
20312031
ast::LitByte(b) => {
20322032
let mut res = String::from_str("b'");
2033-
(b as char).escape_default(|c| {
2033+
for c in (b as char).escape_default() {
20342034
res.push(c);
2035-
});
2035+
}
20362036
res.push('\'');
20372037
res
20382038
},

src/libsyntax/parse/lexer/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ impl<'a> StringReader<'a> {
193193
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
194194
let mut m = m.to_string();
195195
m.push_str(": ");
196-
char::escape_default(c, |c| m.push(c));
196+
for c in c.escape_default() { m.push(c) }
197197
self.fatal_span_(from_pos, to_pos, m.as_slice());
198198
}
199199

@@ -202,7 +202,7 @@ impl<'a> StringReader<'a> {
202202
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
203203
let mut m = m.to_string();
204204
m.push_str(": ");
205-
char::escape_default(c, |c| m.push(c));
205+
for c in c.escape_default() { m.push(c) }
206206
self.err_span_(from_pos, to_pos, m.as_slice());
207207
}
208208

src/libsyntax/print/pprust.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2773,7 +2773,9 @@ impl<'a> State<'a> {
27732773
}
27742774
ast::LitChar(ch) => {
27752775
let mut res = String::from_str("'");
2776-
ch.escape_default(|c| res.push(c));
2776+
for c in ch.escape_default() {
2777+
res.push(c);
2778+
}
27772779
res.push('\'');
27782780
word(&mut self.s, res.as_slice())
27792781
}

0 commit comments

Comments
 (0)