17
17
18
18
use mem:: transmute;
19
19
use option:: { None , Option , Some } ;
20
- use iter:: range_step;
20
+ use iter:: { range_step, Iterator , RangeStep } ;
21
21
use slice:: SlicePrelude ;
22
22
23
23
// UTF-8 ranges and tags for encoding characters
@@ -165,7 +165,9 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
165
165
///
166
166
#[ deprecated = "use the Char::escape_unicode method" ]
167
167
pub fn escape_unicode ( c : char , f: |char|) {
168
- c. escape_unicode ( f)
168
+ for char in c. escape_unicode ( ) {
169
+ f ( char) ;
170
+ }
169
171
}
170
172
171
173
///
@@ -182,7 +184,9 @@ pub fn escape_unicode(c: char, f: |char|) {
182
184
///
183
185
#[ deprecated = "use the Char::escape_default method" ]
184
186
pub fn escape_default ( c : char , f: |char|) {
185
- c. escape_default ( f)
187
+ for c in c. escape_default ( ) {
188
+ f ( c) ;
189
+ }
186
190
}
187
191
188
192
/// Returns the amount of bytes this `char` would need if encoded in UTF-8
@@ -266,7 +270,7 @@ pub trait Char {
266
270
/// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
267
271
/// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
268
272
#[ unstable = "pending error conventions, trait organization" ]
269
- fn escape_unicode ( self , f : |char| ) ;
273
+ fn escape_unicode ( self ) -> UnicodeEscapedChars ;
270
274
271
275
/// Returns a 'default' ASCII and C++11-like literal escape of a
272
276
/// character.
@@ -281,7 +285,7 @@ pub trait Char {
281
285
/// * Any other chars in the range [0x20,0x7e] are not escaped.
282
286
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
283
287
#[ unstable = "pending error conventions, trait organization" ]
284
- fn escape_default ( self , f : |char| ) ;
288
+ fn escape_default ( self ) -> DefaultEscapedChars ;
285
289
286
290
/// Returns the amount of bytes this character would need if encoded in
287
291
/// UTF-8.
@@ -351,38 +355,23 @@ impl Char for char {
351
355
fn from_u32 ( i : u32 ) -> Option < char > { from_u32 ( i) }
352
356
353
357
#[ unstable = "pending error conventions, trait organization" ]
354
- fn escape_unicode ( self , f: |char|) {
355
- // avoid calling str::to_str_radix because we don't really need to allocate
356
- // here.
357
- f ( '\\' ) ;
358
- let pad = match ( ) {
359
- _ if self <= '\xff' => { f ( 'x' ) ; 2 }
360
- _ if self <= '\uffff' => { f ( 'u' ) ; 4 }
361
- _ => { f ( 'U' ) ; 8 }
362
- } ;
363
- for offset in range_step :: < i32 > ( 4 * ( pad - 1 ) , -1 , -4 ) {
364
- let offset = offset as uint ;
365
- unsafe {
366
- match ( ( self as i32 ) >> offset) & 0xf {
367
- i @ 0 ... 9 => { f ( transmute ( '0' as i32 + i) ) ; }
368
- i => { f ( transmute ( 'a' as i32 + ( i - 10 ) ) ) ; }
369
- }
370
- }
371
- }
358
+ fn escape_unicode ( self ) -> UnicodeEscapedChars {
359
+ UnicodeEscapedChars { c : self , state : EscapeBackslash }
372
360
}
373
361
374
362
#[ unstable = "pending error conventions, trait organization" ]
375
- fn escape_default ( self , f: |char|) {
376
- match self {
377
- '\t' => { f ( '\\' ) ; f ( 't' ) ; }
378
- '\r' => { f ( '\\' ) ; f ( 'r' ) ; }
379
- '\n' => { f ( '\\' ) ; f ( 'n' ) ; }
380
- '\\' => { f ( '\\' ) ; f ( '\\' ) ; }
381
- '\'' => { f ( '\\' ) ; f ( '\'' ) ; }
382
- '"' => { f ( '\\' ) ; f ( '"' ) ; }
383
- '\x20' ... '\x7e' => { f ( self ) ; }
384
- _ => self . escape_unicode ( f) ,
385
- }
363
+ fn escape_default ( self ) -> DefaultEscapedChars {
364
+ let init_state = match self {
365
+ '\t' => DefaultEscapeBackslash ( 't' ) ,
366
+ '\r' => DefaultEscapeBackslash ( 'r' ) ,
367
+ '\n' => DefaultEscapeBackslash ( 'n' ) ,
368
+ '\\' => DefaultEscapeBackslash ( '\\' ) ,
369
+ '\'' => DefaultEscapeBackslash ( '\'' ) ,
370
+ '"' => DefaultEscapeBackslash ( '"' ) ,
371
+ '\x20' ... '\x7e' => DefaultEscapeChar ( self ) ,
372
+ _ => DefaultEscapeUnicode ( self . escape_unicode ( ) )
373
+ } ;
374
+ DefaultEscapedChars { state : init_state }
386
375
}
387
376
388
377
#[ inline]
@@ -456,3 +445,75 @@ impl Char for char {
456
445
}
457
446
}
458
447
}
448
+
449
+ /// An iterator over the characters that represent a `char`, as escaped by
450
+ /// Rust's unicode escaping rules.
451
+ pub struct UnicodeEscapedChars {
452
+ c : char ,
453
+ state : UnicodeEscapedCharsState
454
+ }
455
+
456
+ enum UnicodeEscapedCharsState {
457
+ EscapeBackslash ,
458
+ EscapeType ,
459
+ EscapeValue ( RangeStep < i32 > ) ,
460
+ }
461
+
462
+ impl Iterator < char > for UnicodeEscapedChars {
463
+ fn next ( & mut self ) -> Option < char > {
464
+ match self . state {
465
+ EscapeBackslash => {
466
+ self . state = EscapeType ;
467
+ Some ( '\\' )
468
+ }
469
+ EscapeType => {
470
+ let ( typechar, pad) = if self . c <= '\x7f' { ( 'x' , 2 ) }
471
+ else if self . c <= '\uffff' { ( 'u' , 4 ) }
472
+ else { ( 'U' , 8 ) } ;
473
+ self . state = EscapeValue ( range_step ( 4 * ( pad - 1 ) , -1 , -4i32 ) ) ;
474
+ Some ( typechar)
475
+ }
476
+ EscapeValue ( ref mut range_step) => match range_step. next ( ) {
477
+ Some ( offset) => {
478
+ let offset = offset as uint ;
479
+ let v = match ( ( self . c as i32 ) >> offset) & 0xf {
480
+ i @ 0 ... 9 => '0' as i32 + i,
481
+ i => 'a' as i32 + ( i - 10 )
482
+ } ;
483
+ Some ( unsafe { transmute ( v) } )
484
+ }
485
+ None => None
486
+ }
487
+ }
488
+ }
489
+ }
490
+
491
+ /// An iterator over the characters that represent a `char`, escaped
492
+ /// for maximum portability.
493
+ pub struct DefaultEscapedChars {
494
+ state : DefaultEscapedCharsState
495
+ }
496
+
497
+ enum DefaultEscapedCharsState {
498
+ DefaultEscapeBackslash ( char ) ,
499
+ DefaultEscapeChar ( char ) ,
500
+ DefaultEscapeDone ,
501
+ DefaultEscapeUnicode ( UnicodeEscapedChars ) ,
502
+ }
503
+
504
+ impl Iterator < char > for DefaultEscapedChars {
505
+ fn next ( & mut self ) -> Option < char > {
506
+ match self . state {
507
+ DefaultEscapeBackslash ( c) => {
508
+ self . state = DefaultEscapeChar ( c) ;
509
+ Some ( '\\' )
510
+ }
511
+ DefaultEscapeChar ( c) => {
512
+ self . state = DefaultEscapeDone ;
513
+ Some ( c)
514
+ }
515
+ DefaultEscapeDone => None ,
516
+ DefaultEscapeUnicode ( ref mut iter) => iter. next ( )
517
+ }
518
+ }
519
+ }
0 commit comments