Skip to content

Adding various string and vector functions #1617

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 252 additions & 5 deletions src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
push_char, is_utf8, from_chars, to_chars, char_len, char_len_range,
char_at, bytes, is_ascii, shift_byte, pop_byte,
unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at,
from_bytes,
from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice,
contains, iter_chars, loop_chars, loop_chars_sub,
escape;
contains, iter_chars, chars_iter, bytes_iter, words_iter, lines_iter,
loop_chars, loop_chars_sub, escape, any, all, map, windowed;

#[abi = "cdecl"]
native mod rustrt {
Expand Down Expand Up @@ -191,6 +192,19 @@ fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
ret scopy;
}

/*
Function: from_bytes

Safely convert a vector of bytes to a UTF-8 string, or error
*/
fn from_bytes(vv: [u8]) -> result::t<str, str> {
if is_utf8(vv) {
ret result::ok(unsafe_from_bytes(vv));
} else {
ret result::err("vector doesn't contain valid UTF-8");
}
}

/*
Function: unsafe_from_byte

Expand Down Expand Up @@ -346,7 +360,6 @@ Function: iter_chars

Iterate over the characters in a string
*/

fn iter_chars(s: str, it: block(char)) {
let pos = 0u, len = byte_len(s);
while (pos < len) {
Expand All @@ -356,6 +369,34 @@ fn iter_chars(s: str, it: block(char)) {
}
}

/*
Function: chars_iter

Iterate over the characters in a string

FIXME: A synonym to iter_chars
*/
fn chars_iter(ss: str, it: fn(char)) {
iter_chars(ss, it)
}

/*
Function: bytes_iter

Iterate over the bytes in a string

FIXME: Should it really include the last byte '\0'?
*/
fn bytes_iter(ss: str, it: fn(u8)) {
let pos = 0u;
let len = byte_len(ss);

while (pos < len) {
it(ss[pos]);
pos += 1u;
}
}

/*
Function: loop_chars

Expand Down Expand Up @@ -825,7 +866,7 @@ Splits a string into substrings using a function

FIXME: will be renamed to split.
*/
fn split_func(ss: str, sepfn: fn&(cc: char)->bool) -> [str] {
fn split_func(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
let vv: [str] = [];
let accum: str = "";
let ends_with_sep: bool = false;
Expand Down Expand Up @@ -887,6 +928,24 @@ fn words(ss: str) -> [str] {
{|w| 0u < str::char_len(w)});
}

/*
Function: words_iter

Apply a function to each word
*/
fn words_iter(ss: str, ff: fn(&&str)) {
vec::iter(words(ss), ff)
}

/*
Function: lines_iter

Apply a function to each lines (by '\n')
*/
fn lines_iter(ss: str, ff: fn(&&str)) {
vec::iter(lines(ss), ff)
}

/*
Function: concat

Expand Down Expand Up @@ -1113,6 +1172,64 @@ fn escape(s: str) -> str {
r
}

/*
Function: all

Return true if a predicate matches all characters or
if the string contains no characters

// FIXME: a synonym to loop_chars
*/
fn all(ss: str, ff: fn(char) -> bool) -> bool {
str::loop_chars(ss, ff)
}

/*
Function: any

Return true if a predicate matches any character
(and false if it matches none or there are no characters)
*/
fn any(ss: str, pred: fn(char) -> bool) -> bool {
!all(ss, {|cc| !pred(cc)})
}

/*
Function: map

Apply a function to each character
*/
fn map(ss: str, ff: fn(char) -> char) -> str {
let result = "";

str::iter_chars(ss, {|cc|
str::push_char(result, ff(cc));
});

ret result;
}

/*
Function: windowed

Create a vector of substrings of size `nn`
*/
fn windowed(nn: uint, ss: str) -> [str] {
let ww = [];
let len = str::char_len(ss);

assert 1u <= nn;

let ii = 0u;
while ii+nn <= len {
let w = char_slice( ss, ii, ii+nn );
vec::push(ww,w);
ii += 1u;
}

ret ww;
}

#[cfg(test)]
mod tests {

Expand Down Expand Up @@ -1491,6 +1608,23 @@ mod tests {
assert (b == "AAAAAAA");
}

#[test]
fn test_from_bytes() {
let ss = "ศไทย中华Việt Nam";
let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
0xe0_u8, 0xb9_u8, 0x84_u8,
0xe0_u8, 0xb8_u8, 0x97_u8,
0xe0_u8, 0xb8_u8, 0xa2_u8,
0xe4_u8, 0xb8_u8, 0xad_u8,
0xe5_u8, 0x8d_u8, 0x8e_u8,
0x56_u8, 0x69_u8, 0xe1_u8,
0xbb_u8, 0x87_u8, 0x74_u8,
0x20_u8, 0x4e_u8, 0x61_u8,
0x6d_u8];

assert ss == result::get(from_bytes(bb));
}

#[test]
fn test_from_cstr() unsafe {
let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
Expand Down Expand Up @@ -1563,6 +1697,77 @@ mod tests {
}
i += 1;
}

iter_chars("") {|_ch| fail; } // should not fail
}

#[test]
fn test_chars_iter() {
let i = 0;
chars_iter("x\u03c0y") {|ch|
alt i {
0 { assert ch == 'x'; }
1 { assert ch == '\u03c0'; }
2 { assert ch == 'y'; }
}
i += 1;
}

chars_iter("") {|_ch| fail; } // should not fail
}

#[test]
fn test_bytes_iter() {
let i = 0;

bytes_iter("xyz") {|bb|
alt i {
0 { assert bb == 'x' as u8; }
1 { assert bb == 'y' as u8; }
2 { assert bb == 'z' as u8; }
}
i += 1;
}

bytes_iter("") {|bb| assert bb == 0u8; }
}

#[test]
fn test_words_iter() {
let data = "\nMary had a little lamb\nLittle lamb\n";

let ii = 0;

words_iter(data) {|ww|
alt ii {
0 { assert "Mary" == ww; }
1 { assert "had" == ww; }
2 { assert "a" == ww; }
3 { assert "little" == ww; }
_ { () }
}
ii += 1;
}

words_iter("") {|_x| fail; } // should not fail
}

#[test]
fn test_lines_iter () {
let lf = "\nMary had a little lamb\nLittle lamb\n";

let ii = 0;

lines_iter(lf) {|x|
alt ii {
0 { assert "" == x; }
1 { assert "Mary had a little lamb" == x; }
2 { assert "Little lamb" == x; }
3 { assert "" == x; }
_ { () }
}
ii += 1;
}
}

#[test]
Expand All @@ -1572,4 +1777,46 @@ mod tests {
assert(escape("abc\ndef") == "abc\\ndef");
assert(escape("abc\"def") == "abc\\\"def");
}
}

#[test]
fn test_map() {
assert "" == map("", char::to_upper);
assert "YMCA" == map("ymca", char::to_upper);
}

#[test]
fn test_all() {
assert true == all("", char::is_uppercase);
assert false == all("ymca", char::is_uppercase);
assert true == all("YMCA", char::is_uppercase);
assert false == all("yMCA", char::is_uppercase);
assert false == all("YMCy", char::is_uppercase);
}

#[test]
fn test_any() {
assert false == any("", char::is_uppercase);
assert false == any("ymca", char::is_uppercase);
assert true == any("YMCA", char::is_uppercase);
assert true == any("yMCA", char::is_uppercase);
assert true == any("Ymcy", char::is_uppercase);
}

#[test]
fn test_windowed() {
let data = "ประเทศไทย中";

assert ["ประ", "ระเ", "ะเท", "เทศ", "ทศไ", "ศไท", "ไทย", "ทย中"]
== windowed(3u, data);

assert [data] == windowed(10u, data);

assert [] == windowed(6u, "abcd");
}

#[test]
#[should_fail]
fn test_windowed_() {
let _x = windowed(0u, "abcd");
}
}
33 changes: 33 additions & 0 deletions src/libcore/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,23 @@ fn permute<T: copy>(v: [const T], put: block([T])) {
}
}

fn windowed <TT: copy> (nn: uint, xx: [TT]) -> [[TT]] {
let ww = [];

assert 1u <= nn;

vec::iteri (xx, {|ii, _x|
let len = vec::len(xx);

if ii+nn <= len {
let w = vec::slice ( xx, ii, ii+nn );
vec::push (ww, w);
}
});

ret ww;
}

/*
Function: to_ptr

Expand Down Expand Up @@ -1497,6 +1514,22 @@ mod tests {
assert concat([[1], [2,3]]) == [1, 2, 3];
}

#[test]
fn test_windowed () {
assert [[1u,2u,3u],[2u,3u,4u],[3u,4u,5u],[4u,5u,6u]]
== windowed (3u, [1u,2u,3u,4u,5u,6u]);

assert [[1u,2u,3u,4u],[2u,3u,4u,5u],[3u,4u,5u,6u]]
== windowed (4u, [1u,2u,3u,4u,5u,6u]);

assert [] == windowed (7u, [1u,2u,3u,4u,5u,6u]);
}

#[test]
#[should_fail]
fn test_windowed_() {
let _x = windowed (0u, [1u,2u,3u,4u,5u,6u]);
}
}

// Local Variables:
Expand Down