From b844449327b886c684f29a06594f4cd07b289cb1 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 00:36:58 -0800 Subject: [PATCH 1/8] Added str::map and str::all functions --- src/libcore/str.rs | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index b24c2158d4118..3ec1bf066cba8 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1113,6 +1113,33 @@ fn escape(s: str) -> str { r } +/* +Function: all + +Return true if a predicate matches all characters + +If the string contains no characters +*/ +fn all(ss: str, ff: fn&(char) -> bool) -> bool { + str::loop_chars(ss, ff) +} + +/* +Function: map + +Apply a function to each character +*/ +fn map(ss: str, ff: fn&(char) -> char) -> str { + let result = ""; + + str::iter_chars(ss, {|cc| + str::push_char(result, ff(cc)); + }); + + ret result; +} + + #[cfg(test)] mod tests { @@ -1572,4 +1599,19 @@ mod tests { assert(escape("abc\ndef") == "abc\\ndef"); assert(escape("abc\"def") == "abc\\\"def"); } -} \ No newline at end of file + + #[test] + fn test_map () { + assert "" == map("", char::to_upper); + assert "YMCA" == map("ymca", char::to_upper); + } + + #[test] + fn test_all () { + assert true == all("", char::is_uppercase); + assert false == all("ymca", char::is_uppercase); + assert true == all("YMCA", char::is_uppercase); + assert false == all("yMCA", char::is_uppercase); + assert false == all("YMCy", char::is_uppercase); + } +} From 94259605b98a456d45e1a739a78929b4fc1dca96 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 02:41:40 -0800 Subject: [PATCH 2/8] Added str::any, str::bytes_iter, str::windowed, and vec::windowed functions --- src/libcore/str.rs | 132 ++++++++++++++++++++++++++++++++++++++++++--- src/libcore/vec.rs | 33 ++++++++++++ 2 files changed, 158 insertions(+), 7 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 3ec1bf066cba8..ad77137829981 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -14,8 +14,8 @@ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice, - contains, iter_chars, loop_chars, loop_chars_sub, - escape; + contains, iter_chars, chars_iter, bytes_iter, + loop_chars, loop_chars_sub, escape, any, all, map, windowed; #[abi = "cdecl"] native mod rustrt { @@ -346,7 +346,6 @@ Function: iter_chars Iterate over the characters in a string */ - fn iter_chars(s: str, it: block(char)) { let pos = 0u, len = byte_len(s); while (pos < len) { @@ -356,6 +355,34 @@ fn iter_chars(s: str, it: block(char)) { } } +/* +Function: chars_iter + +Iterate over the characters in a string + +FIXME: A synonym to iter_chars +*/ +fn chars_iter(ss: str, it: fn&(char)) { + iter_chars(ss, it) +} + +/* +Function: bytes_iter + +Iterate over the bytes in a string + +FIXME: Should it really include the last byte '\0'? +*/ +fn bytes_iter(ss: str, it: fn&(u8)) { + let pos = 0u; + let len = byte_len(ss); + + while (pos < len) { + it(ss[pos]); + pos += 1u; + } +} + /* Function: loop_chars @@ -1116,14 +1143,25 @@ fn escape(s: str) -> str { /* Function: all -Return true if a predicate matches all characters +Return true if a predicate matches all characters or +if the string contains no characters -If the string contains no characters +// FIXME: a synonym to loop_chars */ fn all(ss: str, ff: fn&(char) -> bool) -> bool { str::loop_chars(ss, ff) } +/* +Function: any + +Return true if a predicate matches any character +(and false if it matches none or there are no characters) +*/ +fn any(ss: str, pred: fn&(char) -> bool) -> bool { + !all(ss, {|cc| !pred(cc)}) +} + /* Function: map @@ -1139,6 +1177,26 @@ fn map(ss: str, ff: fn&(char) -> char) -> str { ret result; } +/* +Function: windowed + +Create a vector of substrings of size `nn` +*/ +fn windowed(nn: uint, ss: str) -> [str] { + let ww = []; + let len = str::char_len(ss); + + assert 1u <= nn; + + let ii = 0u; + while ii+nn <= len { + let w = char_slice( ss, ii, ii+nn ); + vec::push(ww,w); + ii += 1u; + } + + ret ww; +} #[cfg(test)] mod tests { @@ -1590,6 +1648,39 @@ mod tests { } i += 1; } + + iter_chars("") {|ch| fail; } // should not fail + } + + #[test] + fn test_chars_iter() { + let i = 0; + chars_iter("x\u03c0y") {|ch| + alt i { + 0 { assert ch == 'x'; } + 1 { assert ch == '\u03c0'; } + 2 { assert ch == 'y'; } + } + i += 1; + } + + chars_iter("") {|_ch| fail; } // should not fail + } + + #[test] + fn test_bytes_iter() { + let i = 0; + + bytes_iter("xyz") {|bb| + alt i { + 0 { assert bb == 'x' as u8; } + 1 { assert bb == 'y' as u8; } + 2 { assert bb == 'z' as u8; } + } + i += 1; + } + + bytes_iter("") {|bb| assert bb == 0u8; } } #[test] @@ -1601,17 +1692,44 @@ mod tests { } #[test] - fn test_map () { + fn test_map() { assert "" == map("", char::to_upper); assert "YMCA" == map("ymca", char::to_upper); } #[test] - fn test_all () { + fn test_all() { assert true == all("", char::is_uppercase); assert false == all("ymca", char::is_uppercase); assert true == all("YMCA", char::is_uppercase); assert false == all("yMCA", char::is_uppercase); assert false == all("YMCy", char::is_uppercase); } + + #[test] + fn test_any() { + assert false == any("", char::is_uppercase); + assert false == any("ymca", char::is_uppercase); + assert true == any("YMCA", char::is_uppercase); + assert true == any("yMCA", char::is_uppercase); + assert true == any("YMCy", char::is_uppercase); + } + + #[test] + fn test_windowed() { + let data = "ประเทศไทย中"; + + assert ["ประ", "ระเ", "ะเท", "เทศ", "ทศไ", "ศไท", "ไทย", "ทย中"] + == windowed(3u, data); + + assert [data] == windowed(10u, data); + + assert [] == windowed(6u, "abcd"); + } + + #[test] + #[should_fail] + fn test_windowed_() { + let _x = windowed(0u, "abcd"); + } } diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs index f677af1efc7f5..0d3274ccfec37 100644 --- a/src/libcore/vec.rs +++ b/src/libcore/vec.rs @@ -829,6 +829,23 @@ fn permute(v: [const T], put: block([T])) { } } +fn windowed (nn: uint, xx: [TT]) -> [[TT]] { + let ww = []; + + assert 1u <= nn; + + vec::iteri (xx, {|ii, _x| + let len = vec::len(xx); + + if ii+nn <= len { + let w = vec::slice ( xx, ii, ii+nn ); + vec::push (ww, w); + } + }); + + ret ww; +} + /* Function: to_ptr @@ -1497,6 +1514,22 @@ mod tests { assert concat([[1], [2,3]]) == [1, 2, 3]; } + #[test] + fn test_windowed () { + assert [[1u,2u,3u],[2u,3u,4u],[3u,4u,5u],[4u,5u,6u]] + == windowed (3u, [1u,2u,3u,4u,5u,6u]); + + assert [[1u,2u,3u,4u],[2u,3u,4u,5u],[3u,4u,5u,6u]] + == windowed (4u, [1u,2u,3u,4u,5u,6u]); + + assert [] == windowed (7u, [1u,2u,3u,4u,5u,6u]); + } + + #[test] + #[should_fail] + fn test_windowed_() { + let _x = windowed (0u, [1u,2u,3u,4u,5u,6u]); + } } // Local Variables: From 065d78848021d91f2e7cece2bada805e37b6f4eb Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 03:02:44 -0800 Subject: [PATCH 3/8] Added str::words_iter --- src/libcore/str.rs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index ad77137829981..8c68f2f90a142 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -14,7 +14,7 @@ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice, - contains, iter_chars, chars_iter, bytes_iter, + contains, iter_chars, chars_iter, bytes_iter, words_iter, loop_chars, loop_chars_sub, escape, any, all, map, windowed; #[abi = "cdecl"] @@ -914,6 +914,15 @@ fn words(ss: str) -> [str] { {|w| 0u < str::char_len(w)}); } +/* +Function: words_iter + +Apply a function to each word +*/ +fn words_iter(ss: str, ff: fn&(&&str)) { + vec::iter(words(ss), ff) +} + /* Function: concat @@ -1649,7 +1658,7 @@ mod tests { i += 1; } - iter_chars("") {|ch| fail; } // should not fail + iter_chars("") {|_ch| fail; } // should not fail } #[test] @@ -1683,6 +1692,29 @@ mod tests { bytes_iter("") {|bb| assert bb == 0u8; } } + #[test] + fn test_words_iter() { + let data = "\nMary had a little lamb\nLittle lamb\n"; + + let ii = 0; + + words_iter(data) {|ww| + alt ii { + 0 { assert "Mary" == ww; } + 1 { assert "had" == ww; } + 2 { assert "a" == ww; } + 3 { assert "little" == ww; } + _ { () } + } + ii += 1; + } + } + + #[test] + fn test_words_iter_() { + words_iter("") {|_ww| fail; } // should not fail + } + #[test] fn test_escape() { assert(escape("abcdef") == "abcdef"); From 58f7f6c61bc0428e63a9568f48394c5ce06562c1 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 03:04:33 -0800 Subject: [PATCH 4/8] fixing indentation --- src/libcore/str.rs | 58 +++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 8c68f2f90a142..9093faa202672 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -363,7 +363,7 @@ Iterate over the characters in a string FIXME: A synonym to iter_chars */ fn chars_iter(ss: str, it: fn&(char)) { - iter_chars(ss, it) + iter_chars(ss, it) } /* @@ -374,13 +374,13 @@ Iterate over the bytes in a string FIXME: Should it really include the last byte '\0'? */ fn bytes_iter(ss: str, it: fn&(u8)) { - let pos = 0u; - let len = byte_len(ss); + let pos = 0u; + let len = byte_len(ss); - while (pos < len) { - it(ss[pos]); - pos += 1u; - } + while (pos < len) { + it(ss[pos]); + pos += 1u; + } } /* @@ -920,7 +920,7 @@ Function: words_iter Apply a function to each word */ fn words_iter(ss: str, ff: fn&(&&str)) { - vec::iter(words(ss), ff) + vec::iter(words(ss), ff) } /* @@ -1723,29 +1723,29 @@ mod tests { assert(escape("abc\"def") == "abc\\\"def"); } - #[test] - fn test_map() { - assert "" == map("", char::to_upper); - assert "YMCA" == map("ymca", char::to_upper); - } + #[test] + fn test_map() { + assert "" == map("", char::to_upper); + assert "YMCA" == map("ymca", char::to_upper); + } - #[test] - fn test_all() { - assert true == all("", char::is_uppercase); - assert false == all("ymca", char::is_uppercase); - assert true == all("YMCA", char::is_uppercase); - assert false == all("yMCA", char::is_uppercase); - assert false == all("YMCy", char::is_uppercase); - } + #[test] + fn test_all() { + assert true == all("", char::is_uppercase); + assert false == all("ymca", char::is_uppercase); + assert true == all("YMCA", char::is_uppercase); + assert false == all("yMCA", char::is_uppercase); + assert false == all("YMCy", char::is_uppercase); + } - #[test] - fn test_any() { - assert false == any("", char::is_uppercase); - assert false == any("ymca", char::is_uppercase); - assert true == any("YMCA", char::is_uppercase); - assert true == any("yMCA", char::is_uppercase); - assert true == any("YMCy", char::is_uppercase); - } + #[test] + fn test_any() { + assert false == any("", char::is_uppercase); + assert false == any("ymca", char::is_uppercase); + assert true == any("YMCA", char::is_uppercase); + assert true == any("yMCA", char::is_uppercase); + assert true == any("YMCy", char::is_uppercase); + } #[test] fn test_windowed() { From 715f4786e8e07f226753ce9721e0be8924dbea26 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 03:20:01 -0800 Subject: [PATCH 5/8] Added str::lines_iter --- src/libcore/str.rs | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 9093faa202672..e18a200bacfc2 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -14,7 +14,7 @@ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice, - contains, iter_chars, chars_iter, bytes_iter, words_iter, + contains, iter_chars, chars_iter, bytes_iter, words_iter, lines_iter, loop_chars, loop_chars_sub, escape, any, all, map, windowed; #[abi = "cdecl"] @@ -923,6 +923,15 @@ fn words_iter(ss: str, ff: fn&(&&str)) { vec::iter(words(ss), ff) } +/* +Function: lines_iter + +Apply a function to each lines (by '\n') +*/ +fn lines_iter(ss: str, ff: fn&(&&str)) { + vec::iter(lines(ss), ff) +} + /* Function: concat @@ -1708,11 +1717,26 @@ mod tests { } ii += 1; } + + words_iter("") {|_x| fail; } // should not fail } #[test] - fn test_words_iter_() { - words_iter("") {|_ww| fail; } // should not fail + fn test_lines_iter () { + let lf = "\nMary had a little lamb\nLittle lamb\n"; + + let ii = 0; + + lines_iter(lf) {|x| + alt ii { + 0 { assert "" == x; } + 1 { assert "Mary had a little lamb" == x; } + 2 { assert "Little lamb" == x; } + 3 { assert "" == x; } + _ { () } + } + ii += 1; + } } #[test] From c39e7e82c04954cc6683071ef93b2a33076f9d07 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 03:58:48 -0800 Subject: [PATCH 6/8] improved test case for str::any --- src/libcore/str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index e18a200bacfc2..2589216fd4b95 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1768,7 +1768,7 @@ mod tests { assert false == any("ymca", char::is_uppercase); assert true == any("YMCA", char::is_uppercase); assert true == any("yMCA", char::is_uppercase); - assert true == any("YMCy", char::is_uppercase); + assert true == any("Ymcy", char::is_uppercase); } #[test] From e85ae58ae33a7d8224ac2bed0373826cb7ec4e9c Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 20:59:18 -0800 Subject: [PATCH 7/8] Added str::from_bytes (which is UTF-8 safe) --- src/libcore/str.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 2589216fd4b95..8c2038245a5c1 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -13,6 +13,7 @@ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, push_char, is_utf8, from_chars, to_chars, char_len, char_len_range, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, + from_bytes, from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice, contains, iter_chars, chars_iter, bytes_iter, words_iter, lines_iter, loop_chars, loop_chars_sub, escape, any, all, map, windowed; @@ -191,6 +192,19 @@ fn unsafe_from_bytes(v: [const u8]) -> str unsafe { ret scopy; } +/* +Function: from_bytes + +Safely convert a vector of bytes to a UTF-8 string, or error +*/ +fn from_bytes(vv: [u8]) -> result::t { + if is_utf8(vv) { + ret result::ok(unsafe_from_bytes(vv)); + } else { + ret result::err("vector doesn't contain valid UTF-8"); + } +} + /* Function: unsafe_from_byte @@ -1594,6 +1608,23 @@ mod tests { assert (b == "AAAAAAA"); } + #[test] + fn test_from_bytes() { + let ss = "ศไทย中华Việt Nam"; + let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8]; + + assert ss == result::get(from_bytes(bb)); + } + #[test] fn test_from_cstr() unsafe { let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8]; From b358bc2dd977ddfc6fc6781a5743601b664de892 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 23 Jan 2012 21:38:09 -0800 Subject: [PATCH 8/8] s/fn&/fn/ --- src/libcore/str.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 8c2038245a5c1..d4b8ecc4f53f5 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -376,7 +376,7 @@ Iterate over the characters in a string FIXME: A synonym to iter_chars */ -fn chars_iter(ss: str, it: fn&(char)) { +fn chars_iter(ss: str, it: fn(char)) { iter_chars(ss, it) } @@ -387,7 +387,7 @@ Iterate over the bytes in a string FIXME: Should it really include the last byte '\0'? */ -fn bytes_iter(ss: str, it: fn&(u8)) { +fn bytes_iter(ss: str, it: fn(u8)) { let pos = 0u; let len = byte_len(ss); @@ -866,7 +866,7 @@ Splits a string into substrings using a function FIXME: will be renamed to split. */ -fn split_func(ss: str, sepfn: fn&(cc: char)->bool) -> [str] { +fn split_func(ss: str, sepfn: fn(cc: char)->bool) -> [str] { let vv: [str] = []; let accum: str = ""; let ends_with_sep: bool = false; @@ -933,7 +933,7 @@ Function: words_iter Apply a function to each word */ -fn words_iter(ss: str, ff: fn&(&&str)) { +fn words_iter(ss: str, ff: fn(&&str)) { vec::iter(words(ss), ff) } @@ -942,7 +942,7 @@ Function: lines_iter Apply a function to each lines (by '\n') */ -fn lines_iter(ss: str, ff: fn&(&&str)) { +fn lines_iter(ss: str, ff: fn(&&str)) { vec::iter(lines(ss), ff) } @@ -1180,7 +1180,7 @@ if the string contains no characters // FIXME: a synonym to loop_chars */ -fn all(ss: str, ff: fn&(char) -> bool) -> bool { +fn all(ss: str, ff: fn(char) -> bool) -> bool { str::loop_chars(ss, ff) } @@ -1190,7 +1190,7 @@ Function: any Return true if a predicate matches any character (and false if it matches none or there are no characters) */ -fn any(ss: str, pred: fn&(char) -> bool) -> bool { +fn any(ss: str, pred: fn(char) -> bool) -> bool { !all(ss, {|cc| !pred(cc)}) } @@ -1199,7 +1199,7 @@ Function: map Apply a function to each character */ -fn map(ss: str, ff: fn&(char) -> char) -> str { +fn map(ss: str, ff: fn(char) -> char) -> str { let result = ""; str::iter_chars(ss, {|cc|