Skip to content

Commit 276293a

Browse files
committed
Cleaned up case related functions a bit
1 parent ae3b869 commit 276293a

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

src/libcore/char.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -234,6 +234,21 @@ pub fn escape_default(c: char) -> ~str {
234234
}
235235
}
236236

237+
/// Returns the amount of bytes this character would need if encoded in utf8
238+
pub fn len_utf8_bytes(c: char) -> uint {
239+
static max_one_b: uint = 128u;
240+
static max_two_b: uint = 2048u;
241+
static max_three_b: uint = 65536u;
242+
static max_four_b: uint = 2097152u;
243+
244+
let code = c as uint;
245+
if code < max_one_b { 1u }
246+
else if code < max_two_b { 2u }
247+
else if code < max_three_b { 3u }
248+
else if code < max_four_b { 4u }
249+
else { fail!(~"invalid character!") }
250+
}
251+
237252
/**
238253
* Compare two chars
239254
*
@@ -334,7 +349,6 @@ fn test_escape_default() {
334349
assert_eq!(escape_default('\U0001d4b6'), ~"\\U0001d4b6");
335350
}
336351
337-
338352
#[test]
339353
fn test_escape_unicode() {
340354
assert_eq!(escape_unicode('\x00'), ~"\\x00");

src/libcore/str.rs

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -789,16 +789,18 @@ pub fn each_split_within<'a>(ss: &'a str,
789789

790790
/// Convert a string to lowercase. ASCII only
791791
pub fn to_lower(s: &str) -> ~str {
792-
map(s,
793-
|c| unsafe{(libc::tolower(c as libc::c_char)) as char}
794-
)
792+
do map(s) |c| {
793+
assert!(char::is_ascii(c));
794+
(unsafe{libc::tolower(c as libc::c_char)}) as char
795+
}
795796
}
796797

797798
/// Convert a string to uppercase. ASCII only
798799
pub fn to_upper(s: &str) -> ~str {
799-
map(s,
800-
|c| unsafe{(libc::toupper(c as libc::c_char)) as char}
801-
)
800+
do map(s) |c| {
801+
assert!(char::is_ascii(c));
802+
(unsafe{libc::toupper(c as libc::c_char)}) as char
803+
}
802804
}
803805

804806
/**
@@ -3096,12 +3098,11 @@ mod tests {
30963098
30973099
#[test]
30983100
fn test_to_lower() {
3099-
unsafe {
3100-
assert!(~"" == map(~"",
3101-
|c| libc::tolower(c as c_char) as char));
3102-
assert!(~"ymca" == map(~"YMCA",
3103-
|c| libc::tolower(c as c_char) as char));
3104-
}
3101+
// libc::tolower, and hence str::to_lower
3102+
// are culturally insensitive: they only work for ASCII
3103+
// (see Issue #1347)
3104+
assert!(~"" == to_lower(""));
3105+
assert!(~"ymca" == to_lower("YMCA"));
31053106
}
31063107
31073108
#[test]
@@ -3666,12 +3667,8 @@ mod tests {
36663667

36673668
#[test]
36683669
fn test_map() {
3669-
unsafe {
3670-
assert!(~"" == map(~"", |c|
3671-
libc::toupper(c as c_char) as char));
3672-
assert!(~"YMCA" == map(~"ymca",
3673-
|c| libc::toupper(c as c_char) as char));
3674-
}
3670+
assert!(~"" == map(~"", |c| unsafe {libc::toupper(c as c_char)} as char));
3671+
assert!(~"YMCA" == map(~"ymca", |c| unsafe {libc::toupper(c as c_char)} as char));
36753672
}
36763673
36773674
#[test]
@@ -3685,11 +3682,11 @@ mod tests {
36853682
36863683
#[test]
36873684
fn test_any() {
3688-
assert!(false == any(~"", char::is_uppercase));
3685+
assert!(false == any(~"", char::is_uppercase));
36893686
assert!(false == any(~"ymca", char::is_uppercase));
36903687
assert!(true == any(~"YMCA", char::is_uppercase));
3691-
assert!(true == any(~"yMCA", char::is_uppercase));
3692-
assert!(true == any(~"Ymcy", char::is_uppercase));
3688+
assert!(true == any(~"yMCA", char::is_uppercase));
3689+
assert!(true == any(~"Ymcy", char::is_uppercase));
36933690
}
36943691
36953692
#[test]

0 commit comments

Comments
 (0)