From c7ba2281f8da448dd4c1dbb2f8604232a82c031b Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Tue, 24 Jan 2012 01:29:45 -0800
Subject: [PATCH 1/2] Reorganizing str.rs to group and document strings better
 (no functional changes, though FIXMEs added)

---
 src/libcore/str.rs | 1566 ++++++++++++++++++++++++--------------------
 1 file changed, 863 insertions(+), 703 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index fa1aca29efa4c..2d4d1a7419d64 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1,182 +1,130 @@
 /*
 Module: str
 
-String manipulation.
-*/
-
-export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
-       byte_len_range, index,
-       rindex, find, starts_with, ends_with, substr, slice, split, splitn,
-       split_str, split_func, split_char, lines, lines_any, words,
-       concat, connect, to_lower, to_upper, replace, char_slice,
-       trim_left, trim_right, trim, unshift_char, shift_char, pop_char,
-       push_char, is_utf8, from_chars, to_chars, char_len, char_len_range,
-       char_at, bytes, is_ascii, shift_byte, pop_byte,
-       unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at,
-       from_bytes,
-       from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice,
-       contains, iter_chars, chars_iter, bytes_iter, words_iter, lines_iter,
-       loop_chars, loop_chars_sub, escape, any, all, map, windowed;
+String manipulation
+
+Strings are a packed UTF-8 representation of text, stored as null terminated
+buffers of u8 bytes.  Strings should be considered by character,
+for correctness, but some UTF-8 unsafe functions are also provided.
+For some heavy-duty uses, we recommend trying std::rope.
+*/
+
+export
+   // Creating a string
+   from_bytes,
+   unsafe_from_bytes,
+   unsafe_from_byte,
+   //push_utf8_bytes,
+   from_char,
+   from_chars,
+   from_cstr,
+   concat,
+   connect,
+
+   // Adding things to and removing things from a string
+   push_char,
+   pop_char,
+   shift_char,
+   unshift_char,
+   push_byte,
+   //push_bytes,
+   pop_byte,
+   shift_byte,
+   trim_left,
+   trim_right,
+   trim,
+
+   // Transforming strings
+   bytes,
+   to_chars,
+   substr,
+   char_slice,
+   slice,
+   safe_slice,
+   split,
+   splitn,
+   split_str,
+   split_func,
+   split_char,
+   lines,
+   lines_any,
+   words,
+   windowed,
+   to_lower,
+   to_upper,
+   replace,
+   escape,
+
+   // Comparing strings
+   eq,
+   lteq,
+   hash,
+
+   // Iterating through strings
+   loop_chars,
+   all,
+   any,
+   map,
+   bytes_iter,
+   iter_chars,
+   chars_iter,
+   words_iter,
+   lines_iter,
+
+   // Searching
+   index,
+   rindex,
+   find,
+   contains,
+   starts_with,
+   ends_with,
+
+   // String properties
+   is_ascii,
+   is_empty,
+   is_not_empty,
+   is_whitespace,
+   byte_len,
+   char_len,
+
+   // Misc
+   // FIXME: perhaps some more of this section shouldn't be exported?
+   is_utf8,
+   char_len_range,
+   byte_len_range,
+   utf8_char_width,
+   char_range_at,
+   char_at,
+   loop_chars_sub,
+   escape_char,
+   as_buf,
+   //buf,
+   sbuf;
+
+
 
 #[abi = "cdecl"]
 native mod rustrt {
     fn rust_str_push(&s: str, ch: u8);
 }
 
-/*
-Function: eq
-
-Bytewise string equality
-*/
-pure fn eq(&&a: str, &&b: str) -> bool { a == b }
-
-/*
-Function: lteq
-
-Bytewise less than or equal
-*/
-pure fn lteq(&&a: str, &&b: str) -> bool { a <= b }
-
-/*
-Function: hash
-
-String hash function
-*/
-fn hash(&&s: str) -> uint {
-    // djb hash.
-    // FIXME: replace with murmur.
-
-    let u: uint = 5381u;
-    for c: u8 in s { u *= 33u; u += c as uint; }
-    ret u;
-}
-
-// UTF-8 tags and ranges
-const tag_cont_u8: u8 = 128u8;
-const tag_cont: uint = 128u;
-const max_one_b: uint = 128u;
-const tag_two_b: uint = 192u;
-const max_two_b: uint = 2048u;
-const tag_three_b: uint = 224u;
-const max_three_b: uint = 65536u;
-const tag_four_b: uint = 240u;
-const max_four_b: uint = 2097152u;
-const tag_five_b: uint = 248u;
-const max_five_b: uint = 67108864u;
-const tag_six_b: uint = 252u;
-
-/*
-Function: is_utf8
-
-Determines if a vector uf bytes contains valid UTF-8
-*/
-fn is_utf8(v: [u8]) -> bool {
-    let i = 0u;
-    let total = vec::len::<u8>(v);
-    while i < total {
-        let chsize = utf8_char_width(v[i]);
-        if chsize == 0u { ret false; }
-        if i + chsize > total { ret false; }
-        i += 1u;
-        while chsize > 1u {
-            if v[i] & 192u8 != tag_cont_u8 { ret false; }
-            i += 1u;
-            chsize -= 1u;
-        }
-    }
-    ret true;
-}
-
-/*
-Function: is_ascii
-
-Determines if a string contains only ASCII characters
-*/
-fn is_ascii(s: str) -> bool {
-    let i: uint = byte_len(s);
-    while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } }
-    ret true;
-}
-
-/*
-Predicate: is_empty
-
-Returns true if the string has length 0
-*/
-pure fn is_empty(s: str) -> bool { for c: u8 in s { ret false; } ret true; }
-
-/*
-Predicate: is_not_empty
-
-Returns true if the string has length greater than 0
-*/
-pure fn is_not_empty(s: str) -> bool { !is_empty(s) }
-
-/*
-Function: is_whitespace
-
-Returns true if the string contains only whitespace
-*/
-fn is_whitespace(s: str) -> bool {
-    ret loop_chars(s, char::is_whitespace);
-}
-
-/*
-Function: byte_len
-
-Returns the length in bytes of a string
-*/
-pure fn byte_len(s: str) -> uint unsafe {
-    let v: [u8] = unsafe::reinterpret_cast(s);
-    let vlen = vec::len(v);
-    unsafe::leak(v);
-    // There should always be a null terminator
-    assert (vlen > 0u);
-    ret vlen - 1u;
-}
+// FIXME: add pure to a lot of functions
 
 /*
-Function: byte_len_range
-
-As byte_len but for a substring
-
-Parameters:
-s - A string
-byte_offset - The byte offset at which to start in the string
-char_len    - The number of chars (not bytes!) in the range
-
-Returns:
-The number of bytes in the substring starting at `byte_offset` and
-containing `char_len` chars.
-
-Safety note:
-
-This function fails if `byte_offset` or `char_len` do not represent
-valid positions in `s`
+Section: Creating a string
 */
-fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
-    let i = byte_offset;
-    let chars = 0u;
-    while chars < char_len {
-        let chsize = utf8_char_width(s[i]);
-        assert (chsize > 0u);
-        i += chsize;
-        chars += 1u;
-    }
-    ret i - byte_offset;
-}
 
 /*
-Function: bytes
+Function: from_bytes
 
-Converts a string to a vector of bytes. The result vector is not
-null-terminated.
+Safely convert a vector of bytes to a UTF-8 string, or error
 */
-fn bytes(s: str) -> [u8] unsafe {
-    let v = unsafe::reinterpret_cast(s);
-    let vcopy = vec::slice(v, 0u, vec::len(v) - 1u);
-    unsafe::leak(v);
-    ret vcopy;
+fn from_bytes(vv: [u8]) -> result::t<str, str> {
+   if is_utf8(vv) {
+      ret result::ok(unsafe_from_bytes(vv));
+   } else {
+      ret result::err("vector doesn't contain valid UTF-8");
+   }
 }
 
 /*
@@ -184,6 +132,8 @@ Function: unsafe_from_bytes
 
 Converts a vector of bytes to a string. Does not verify that the
 vector contains valid UTF-8.
+
+// FIXME: remove?
 */
 fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
     let vcopy: [u8] = v + [0u8];
@@ -192,24 +142,13 @@ fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
     ret scopy;
 }
 
-/*
-Function: from_bytes
-
-Safely convert a vector of bytes to a UTF-8 string, or error
-*/
-fn from_bytes(vv: [u8]) -> result::t<str, str> {
-   if is_utf8(vv) {
-      ret result::ok(unsafe_from_bytes(vv));
-   } else {
-      ret result::err("vector doesn't contain valid UTF-8");
-   }
-}
-
 /*
 Function: unsafe_from_byte
 
 Converts a byte to a string. Does not verify that the byte is
 valid UTF-8.
+
+FIXME: rename to 'from_byte'
 */
 fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) }
 
@@ -265,237 +204,219 @@ fn from_chars(chs: [char]) -> str {
 }
 
 /*
-Function: utf8_char_width
+Function: from_cstr
 
-Given a first byte, determine how many bytes are in this UTF-8 character
+Create a Rust string from a null-terminated C string
 */
-pure fn utf8_char_width(b: u8) -> uint {
-    let byte: uint = b as uint;
-    if byte < 128u { ret 1u; }
-    if byte < 192u {
-        ret 0u; // Not a valid start byte
-
+unsafe fn from_cstr(cstr: sbuf) -> str {
+    let res = "";
+    let start = cstr;
+    let curr = start;
+    let i = 0u;
+    while *curr != 0u8 {
+        push_byte(res, *curr);
+        i += 1u;
+        curr = ptr::offset(start, i);
     }
-    if byte < 224u { ret 2u; }
-    if byte < 240u { ret 3u; }
-    if byte < 248u { ret 4u; }
-    if byte < 252u { ret 5u; }
-    ret 6u;
+    ret res;
 }
 
 /*
-Function: char_range_at
+Function: concat
 
-Pluck a character out of a string and return the index of the next character.
-This function can be used to iterate over the unicode characters of a string.
+Concatenate a vector of strings
+*/
+fn concat(v: [str]) -> str {
+    let s: str = "";
+    for ss: str in v { s += ss; }
+    ret s;
+}
 
-Example:
-> let s = "中华Việt Nam";
-> let i = 0u;
-> while i < str::byte_len(s) {
->    let {ch, next} = str::char_range_at(s, i);
->    std::io::println(#fmt("%u: %c",i,ch));
->    i = next;
-> }
+/*
+Function: connect
 
-Example output:
+Concatenate a vector of strings, placing a given separator between each
+*/
+fn connect(v: [str], sep: str) -> str {
+    let s: str = "";
+    let first: bool = true;
+    for ss: str in v {
+        if first { first = false; } else { s += sep; }
+        s += ss;
+    }
+    ret s;
+}
 
-      0: 中
-      3: 华
-      6: V
-      7: i
-      8: ệ
-      11: t
-      12:
-      13: N
-      14: a
-      15: m
+/*
+Section: Adding to and removing from a string
+*/
 
-Parameters:
+/*
+Function: push_char
 
-s - The string
-i - The byte offset of the char to extract
+Append a character to a string
+*/
+fn push_char(&s: str, ch: char) { s += from_char(ch); }
 
-Returns:
+/*
+Function: pop_char
 
-A record {ch: char, next: uint} containing the char value and the byte
-index of the next unicode character.
+Remove the final character from a string and return it.
 
 Failure:
 
-If `i` is greater than or equal to the length of the string.
-If `i` is not the index of the beginning of a valid UTF-8 character.
+If the string does not contain any characters.
 */
-fn char_range_at(s: str, i: uint) -> {ch: char, next: uint} {
-    let b0 = s[i];
-    let w = utf8_char_width(b0);
-    assert (w != 0u);
-    if w == 1u { ret {ch: b0 as char, next: i + 1u}; }
-    let val = 0u;
-    let end = i + w;
-    let i = i + 1u;
-    while i < end {
-        let byte = s[i];
-        assert (byte & 192u8 == tag_cont_u8);
-        val <<= 6u;
-        val += byte & 63u8 as uint;
-        i += 1u;
-    }
-    // Clunky way to get the right bits from the first byte. Uses two shifts,
-    // the first to clip off the marker bits at the left of the byte, and then
-    // a second (as uint) to get it to the right position.
-    val += (b0 << (w + 1u as u8) as uint) << ((w - 1u) * 6u - w - 1u);
-    ret {ch: val as char, next: i};
+fn pop_char(&s: str) -> char {
+    let end = byte_len(s);
+    while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
+    assert (end > 0u);
+    let ch = char_at(s, end - 1u);
+    s = substr(s, 0u, end - 1u);
+    ret ch;
 }
 
 /*
-Function: char_at
+Function: shift_char
 
-Pluck a character out of a string
+Remove the first character from a string and return it.
+
+Failure:
+
+If the string does not contain any characters.
 */
-fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
+fn shift_char(&s: str) -> char {
+    let r = char_range_at(s, 0u);
+    s = substr(s, r.next, byte_len(s) - r.next);
+    ret r.ch;
+}
 
 /*
-Function: iter_chars
+Function: unshift_char
 
-Iterate over the characters in a string
+Prepend a char to a string
 */
-fn iter_chars(s: str, it: fn(char)) {
-    let pos = 0u, len = byte_len(s);
-    while (pos < len) {
-        let {ch, next} = char_range_at(s, pos);
-        pos = next;
-        it(ch);
-    }
-}
+fn unshift_char(&s: str, ch: char) { s = from_char(ch) + s; }
 
 /*
-Function: chars_iter
+Function: push_byte
 
-Iterate over the characters in a string
+Appends a byte to a string.
 
-FIXME: A synonym to iter_chars
+This function is not unicode-safe.
 */
-fn chars_iter(ss: str, it: fn(char)) {
-    iter_chars(ss, it)
-}
+fn push_byte(&s: str, b: u8) { rustrt::rust_str_push(s, b); }
 
 /*
-Function: bytes_iter
+Function: push_bytes
 
-Iterate over the bytes in a string
+Appends a vector of bytes to a string.
 
-FIXME: Should it really include the last byte '\0'?
+This function is not unicode-safe.
 */
-fn bytes_iter(ss: str, it: fn(u8)) {
-    let pos = 0u;
-    let len = byte_len(ss);
-
-    while (pos < len) {
-        it(ss[pos]);
-        pos += 1u;
-    }
+fn push_bytes(&s: str, bytes: [u8]) {
+    for byte in bytes { rustrt::rust_str_push(s, byte); }
 }
 
 /*
-Function: loop_chars
-
-Loop through a string, char by char
-
-Parameters:
-s  - A string to traverse. It may be empty.
-it - A block to execute with each consecutive character of `s`.
-Return `true` to continue, `false` to stop.
+Function: pop_byte
 
-Returns:
+Removes the last byte from a string and returns it.
 
-`true` If execution proceeded correctly, `false` if it was interrupted,
-that is if `it` returned `false` at any point.
- */
-fn loop_chars(s: str, it: fn(char) -> bool) -> bool{
-    ret loop_chars_sub(s, 0u, byte_len(s), it);
+This function is not unicode-safe.
+*/
+fn pop_byte(&s: str) -> u8 {
+    let len = byte_len(s);
+    assert (len > 0u);
+    let b = s[len - 1u];
+    s = substr(s, 0u, len - 1u);
+    ret b;
 }
 
 /*
-Function: loop_chars_sub
-
-Loop through a substring, char by char
+Function: shift_byte
 
-Parameters:
-s           - A string to traverse. It may be empty.
-byte_offset - The byte offset at which to start in the string.
-byte_len    - The number of bytes to traverse in the string
-it          - A block to execute with each consecutive character of `s`.
-Return `true` to continue, `false` to stop.
+Removes the first byte from a string and returns it.
 
-Returns:
+This function is not unicode-safe.
+*/
+fn shift_byte(&s: str) -> u8 {
+    let len = byte_len(s);
+    assert (len > 0u);
+    let b = s[0];
+    s = substr(s, 1u, len - 1u);
+    ret b;
+}
 
-`true` If execution proceeded correctly, `false` if it was interrupted,
-that is if `it` returned `false` at any point.
+/*
+Function: trim_left
 
-Safety note:
-- This function does not check whether the substring is valid.
-- This function fails if `byte_offset` or `byte_len` do not
- represent valid positions inside `s`
- */
-fn loop_chars_sub(s: str, byte_offset: uint, byte_len: uint,
-              it: fn(char) -> bool) -> bool {
-   let i = byte_offset;
-   let result = true;
-   while i < byte_len {
-      let {ch, next} = char_range_at(s, i);
-      if !it(ch) {result = false; break;}
-      i = next;
-   }
-   ret result;
+Returns a string with leading whitespace removed.
+*/
+fn trim_left(s: str) -> str {
+    fn count_whities(s: [char]) -> uint {
+        let i = 0u;
+        while i < vec::len(s) {
+            if !char::is_whitespace(s[i]) { break; }
+            i += 1u;
+        }
+        ret i;
+    }
+    let chars = to_chars(s);
+    let whities = count_whities(chars);
+    ret from_chars(vec::slice(chars, whities, vec::len(chars)));
 }
 
-
 /*
-Function: char_len
+Function: trim_right
 
-Count the number of unicode characters in a string
+Returns a string with trailing whitespace removed.
 */
-fn char_len(s: str) -> uint {
-    ret char_len_range(s, 0u, byte_len(s));
+fn trim_right(s: str) -> str {
+    fn count_whities(s: [char]) -> uint {
+        let i = vec::len(s);
+        while 0u < i {
+            if !char::is_whitespace(s[i - 1u]) { break; }
+            i -= 1u;
+        }
+        ret i;
+    }
+    let chars = to_chars(s);
+    let whities = count_whities(chars);
+    ret from_chars(vec::slice(chars, 0u, whities));
 }
 
 /*
-Function: char_len_range
+Function: trim
 
-As char_len but for a slice of a string
+Returns a string with leading and trailing whitespace removed
+*/
+fn trim(s: str) -> str { trim_left(trim_right(s)) }
 
-Parameters:
- s           - A valid string
- byte_start  - The position inside `s` where to start counting in bytes.
- byte_len    - The number of bytes of `s` to take into account.
 
-Returns:
- The number of Unicode characters in `s` in
-segment [byte_start, byte_start+len( .
+/*
+Section: Transforming strings
+*/
 
-Safety note:
-- This function does not check whether the substring is valid.
-- This function fails if `byte_offset` or `byte_len` do not
- represent valid positions inside `s`
+/*
+Function: bytes
+
+Converts a string to a vector of bytes. The result vector is not
+null-terminated.
 */
-fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
-    let i     = byte_start;
-    let len   = 0u;
-    while i < byte_len {
-        let chsize = utf8_char_width(s[i]);
-        assert (chsize > 0u);
-        len += 1u;
-        i += chsize;
-    }
-    assert (i == byte_len);
-    ret len;
+fn bytes(s: str) -> [u8] unsafe {
+    let v = unsafe::reinterpret_cast(s);
+    let vcopy = vec::slice(v, 0u, vec::len(v) - 1u);
+    unsafe::leak(v);
+    ret vcopy;
 }
 
 /*
 Function: to_chars
 
 Convert a string to a vector of characters
+
+FIXME: rename to 'chars'
 */
 fn to_chars(s: str) -> [char] {
     let buf: [char] = [];
@@ -510,175 +431,37 @@ fn to_chars(s: str) -> [char] {
 }
 
 /*
-Function: push_char
-
-Append a character to a string
-*/
-fn push_char(&s: str, ch: char) { s += from_char(ch); }
+Function: substr
 
-/*
-Function: pop_char
+Take a substring of another. Returns a string containing `len` bytes
+starting at byte offset `begin`.
 
-Remove the final character from a string and return it.
+FIXME: This function is not unicode-safe.
 
 Failure:
 
-If the string does not contain any characters.
+If `begin` + `len` is is greater than the byte length of the string
 */
-fn pop_char(&s: str) -> char {
-    let end = byte_len(s);
-    while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
-    assert (end > 0u);
-    let ch = char_at(s, end - 1u);
-    s = substr(s, 0u, end - 1u);
-    ret ch;
+fn substr(s: str, begin: uint, len: uint) -> str {
+    ret slice(s, begin, begin + len);
 }
 
 /*
-Function: shift_char
+Function: char_slice
 
-Remove the first character from a string and return it.
+Unicode-safe slice. Returns a slice of the given string containing
+the characters in the range [`begin`..`end`). `begin` and `end` are
+character indexes, not byte indexes.
 
 Failure:
 
-If the string does not contain any characters.
+- If begin is greater than end
+- If end is greater than the character length of the string
+
+FIXME: rename to slice(), make faster by avoiding char conversion
 */
-fn shift_char(&s: str) -> char {
-    let r = char_range_at(s, 0u);
-    s = substr(s, r.next, byte_len(s) - r.next);
-    ret r.ch;
-}
-
-/*
-Function: unshift_char
-
-Prepend a char to a string
-*/
-fn unshift_char(&s: str, ch: char) { s = from_char(ch) + s; }
-
-/*
-Function: index
-
-Returns the index of the first matching byte. Returns -1 if
-no match is found.
-*/
-fn index(s: str, c: u8) -> int {
-    let i: int = 0;
-    for k: u8 in s { if k == c { ret i; } i += 1; }
-    ret -1;
-}
-
-/*
-Function: rindex
-
-Returns the index of the last matching byte. Returns -1
-if no match is found.
-*/
-fn rindex(s: str, c: u8) -> int {
-    let n: int = byte_len(s) as int;
-    while n >= 0 { if s[n] == c { ret n; } n -= 1; }
-    ret n;
-}
-
-/*
-Function: find
-
-Finds the index of the first matching substring.
-Returns -1 if `haystack` does not contain `needle`.
-
-Parameters:
-
-haystack - The string to look in
-needle - The string to look for
-
-Returns:
-
-The index of the first occurance of `needle`, or -1 if not found.
-*/
-fn find(haystack: str, needle: str) -> int {
-    let haystack_len: int = byte_len(haystack) as int;
-    let needle_len: int = byte_len(needle) as int;
-    if needle_len == 0 { ret 0; }
-    fn match_at(haystack: str, needle: str, i: int) -> bool {
-        let j: int = i;
-        for c: u8 in needle { if haystack[j] != c { ret false; } j += 1; }
-        ret true;
-    }
-    let i: int = 0;
-    while i <= haystack_len - needle_len {
-        if match_at(haystack, needle, i) { ret i; }
-        i += 1;
-    }
-    ret -1;
-}
-
-/*
-Function: contains
-
-Returns true if one string contains another
-
-Parameters:
-
-haystack - The string to look in
-needle - The string to look for
-*/
-fn contains(haystack: str, needle: str) -> bool {
-    0 <= find(haystack, needle)
-}
-
-/*
-Function: starts_with
-
-Returns true if one string starts with another
-
-Parameters:
-
-haystack - The string to look in
-needle - The string to look for
-*/
-fn starts_with(haystack: str, needle: str) -> bool {
-    let haystack_len: uint = byte_len(haystack);
-    let needle_len: uint = byte_len(needle);
-    if needle_len == 0u { ret true; }
-    if needle_len > haystack_len { ret false; }
-    ret eq(substr(haystack, 0u, needle_len), needle);
-}
-
-/*
-Function: ends_with
-
-Returns true if one string ends with another
-
-haystack - The string to look in
-needle - The string to look for
-*/
-fn ends_with(haystack: str, needle: str) -> bool {
-    let haystack_len: uint = byte_len(haystack);
-    let needle_len: uint = byte_len(needle);
-    ret if needle_len == 0u {
-            true
-        } else if needle_len > haystack_len {
-            false
-        } else {
-            eq(substr(haystack, haystack_len - needle_len, needle_len),
-               needle)
-        };
-}
-
-/*
-Function: substr
-
-Take a substring of another. Returns a string containing `len` bytes
-starting at byte offset `begin`.
-
-This function is not unicode-safe.
-
-Failure:
-
-If `begin` + `len` is is greater than the byte length of the string
-*/
-fn substr(s: str, begin: uint, len: uint) -> str {
-    ret slice(s, begin, begin + len);
+fn char_slice(s: str, begin: uint, end: uint) -> str {
+    from_chars(vec::slice(to_chars(s), begin, end))
 }
 
 /*
@@ -693,6 +476,8 @@ Failure:
 
 - If begin is greater than end.
 - If end is greater than the length of the string.
+
+FIXME: rename to slice_byte or slice_byte_unsafe
 */
 fn slice(s: str, begin: uint, end: uint) -> str unsafe {
     // FIXME: Typestate precondition
@@ -710,6 +495,10 @@ fn slice(s: str, begin: uint, end: uint) -> str unsafe {
 
 /*
 Function: safe_slice
+
+FIXME: make sure char_slice / slice / byte_slice
+       have these preconditions and assertions
+FIXME: this shouldn't be mistaken for a UTF-8 safe slice
 */
 fn safe_slice(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str {
     // would need some magic to make this a precondition
@@ -717,56 +506,6 @@ fn safe_slice(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str {
     ret slice(s, begin, end);
 }
 
-/*
-Function: shift_byte
-
-Removes the first byte from a string and returns it.
-
-This function is not unicode-safe.
-*/
-fn shift_byte(&s: str) -> u8 {
-    let len = byte_len(s);
-    assert (len > 0u);
-    let b = s[0];
-    s = substr(s, 1u, len - 1u);
-    ret b;
-}
-
-/*
-Function: pop_byte
-
-Removes the last byte from a string and returns it.
-
-This function is not unicode-safe.
-*/
-fn pop_byte(&s: str) -> u8 {
-    let len = byte_len(s);
-    assert (len > 0u);
-    let b = s[len - 1u];
-    s = substr(s, 0u, len - 1u);
-    ret b;
-}
-
-/*
-Function: push_byte
-
-Appends a byte to a string.
-
-This function is not unicode-safe.
-*/
-fn push_byte(&s: str, b: u8) { rustrt::rust_str_push(s, b); }
-
-/*
-Function: push_bytes
-
-Appends a vector of bytes to a string.
-
-This function is not unicode-safe.
-*/
-fn push_bytes(&s: str, bytes: [u8]) {
-    for byte in bytes { rustrt::rust_str_push(s, byte); }
-}
-
 /*
 Function: split
 
@@ -801,6 +540,8 @@ Split a string at each occurance of a given separator up to count times.
 Returns:
 
 A vector containing all the strings between each occurance of the separator
+
+FIXME: rename to 'splitn_char'
 */
 fn splitn(s: str, sep: u8, count: uint) -> [str] {
     let v = [];
@@ -864,7 +605,7 @@ Function: split_func
 Splits a string into substrings using a function
 (unicode safe)
 
-FIXME: will be renamed to split.
+FIXME: rename to 'split'
 */
 fn split_func(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
     let vv: [str] = [];
@@ -929,53 +670,32 @@ fn words(ss: str) -> [str] {
 }
 
 /*
-Function: words_iter
-
-Apply a function to each word
-*/
-fn words_iter(ss: str, ff: fn(&&str)) {
-    vec::iter(words(ss), ff)
-}
-
-/*
-Function: lines_iter
-
-Apply a function to each lines (by '\n')
-*/
-fn lines_iter(ss: str, ff: fn(&&str)) {
-    vec::iter(lines(ss), ff)
-}
-
-/*
-Function: concat
+Function: windowed
 
-Concatenate a vector of strings
+Create a vector of substrings of size `nn`
 */
-fn concat(v: [str]) -> str {
-    let s: str = "";
-    for ss: str in v { s += ss; }
-    ret s;
-}
+fn windowed(nn: uint, ss: str) -> [str] {
+    let ww = [];
+    let len = str::char_len(ss);
 
-/*
-Function: connect
+    assert 1u <= nn;
 
-Concatenate a vector of strings, placing a given separator between each
-*/
-fn connect(v: [str], sep: str) -> str {
-    let s: str = "";
-    let first: bool = true;
-    for ss: str in v {
-        if first { first = false; } else { s += sep; }
-        s += ss;
+    let ii = 0u;
+    while ii+nn <= len {
+        let w = char_slice( ss, ii, ii+nn );
+        vec::push(ww,w);
+        ii += 1u;
     }
-    ret s;
+
+    ret ww;
 }
 
 /*
 Function: to_lower
 
 Convert a string to lowercase
+
+FIXME: rewrite with map
 */
 fn to_lower(s: str) -> str {
     let outstr = "";
@@ -984,10 +704,13 @@ fn to_lower(s: str) -> str {
     }
     ret outstr;
 }
+
 /*
 Function: to_upper
 
 Convert a string to uppercase
+
+FIXME: rewrite with map
 */
 fn to_upper(s: str) -> str {
     let outstr = "";
@@ -1031,145 +754,72 @@ fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str {
     }
 }
 
-// FIXME: Also not efficient
 /*
-Function: char_slice
-
-Unicode-safe slice. Returns a slice of the given string containing
-the characters in the range [`begin`..`end`). `begin` and `end` are
-character indexes, not byte indexes.
-
-Failure:
+Function: escape
 
-- If begin is greater than end
-- If end is greater than the character length of the string
+Escapes special characters inside the string, making it safe for transfer.
 */
-fn char_slice(s: str, begin: uint, end: uint) -> str {
-    from_chars(vec::slice(to_chars(s), begin, end))
+fn escape(s: str) -> str {
+    let r = "";
+    loop_chars(s, { |c| r += escape_char(c); true });
+    r
 }
 
 /*
-Function: trim_left
-
-Returns a string with leading whitespace removed.
+Section: Comparing strings
 */
-fn trim_left(s: str) -> str {
-    fn count_whities(s: [char]) -> uint {
-        let i = 0u;
-        while i < vec::len(s) {
-            if !char::is_whitespace(s[i]) { break; }
-            i += 1u;
-        }
-        ret i;
-    }
-    let chars = to_chars(s);
-    let whities = count_whities(chars);
-    ret from_chars(vec::slice(chars, whities, vec::len(chars)));
-}
 
 /*
-Function: trim_right
+Function: eq
 
-Returns a string with trailing whitespace removed.
+Bytewise string equality
 */
-fn trim_right(s: str) -> str {
-    fn count_whities(s: [char]) -> uint {
-        let i = vec::len(s);
-        while 0u < i {
-            if !char::is_whitespace(s[i - 1u]) { break; }
-            i -= 1u;
-        }
-        ret i;
-    }
-    let chars = to_chars(s);
-    let whities = count_whities(chars);
-    ret from_chars(vec::slice(chars, 0u, whities));
-}
+pure fn eq(&&a: str, &&b: str) -> bool { a == b }
 
 /*
-Function: trim
+Function: lteq
 
-Returns a string with leading and trailing whitespace removed
+Bytewise less than or equal
 */
-fn trim(s: str) -> str { trim_left(trim_right(s)) }
+pure fn lteq(&&a: str, &&b: str) -> bool { a <= b }
 
 /*
-Type: sbuf
+Function: hash
 
-An unsafe buffer of bytes. Corresponds to a C char pointer.
+String hash function
 */
-type sbuf = *u8;
+fn hash(&&s: str) -> uint {
+    // djb hash.
+    // FIXME: replace with murmur.
 
-// NB: This is intentionally unexported because it's easy to misuse (there's
-// no guarantee that the string is rooted). Instead, use as_buf below.
-unsafe fn buf(s: str) -> sbuf {
-    let saddr = ptr::addr_of(s);
-    let vaddr: *[u8] = unsafe::reinterpret_cast(saddr);
-    let buf = vec::to_ptr(*vaddr);
-    ret buf;
+    let u: uint = 5381u;
+    for c: u8 in s { u *= 33u; u += c as uint; }
+    ret u;
 }
 
 /*
-Function: as_buf
-
-Work with the byte buffer of a string. Allows for unsafe manipulation
-of strings, which is useful for native interop.
-
-Example:
-
-> let s = str::as_buf("PATH", { |path_buf| libc::getenv(path_buf) });
-
+Section: Iterating through strings
 */
-fn as_buf<T>(s: str, f: fn(sbuf) -> T) -> T unsafe {
-    let buf = buf(s); f(buf)
-}
 
 /*
-Function: from_cstr
+Function: loop_chars
 
-Create a Rust string from a null-terminated C string
-*/
-unsafe fn from_cstr(cstr: sbuf) -> str {
-    let res = "";
-    let start = cstr;
-    let curr = start;
-    let i = 0u;
-    while *curr != 0u8 {
-        push_byte(res, *curr);
-        i += 1u;
-        curr = ptr::offset(start, i);
-    }
-    ret res;
-}
+Loop through a string, char by char
 
-/*
-Function: escape_char
+Parameters:
+s  - A string to traverse. It may be empty.
+it - A block to execute with each consecutive character of `s`.
+Return `true` to continue, `false` to stop.
 
-Escapes a single character.
-*/
-fn escape_char(c: char) -> str {
-    alt c {
-      '"' { "\\\"" }
-      '\\' { "\\\\" }
-      '\n' { "\\n" }
-      '\t' { "\\t" }
-      '\r' { "\\r" }
-      // FIXME: uncomment this when extfmt is moved to core
-      // in a snapshot.
-      // '\x00' to '\x1f' { #fmt["\\x%02x", c as uint] }
-      v { from_char(c) }
-    }
-}
+Returns:
 
-/*
-Function: escape
+`true` If execution proceeded correctly, `false` if it was interrupted,
+that is if `it` returned `false` at any point.
 
-Escapes special characters inside the string, making it safe for transfer.
-*/
-fn escape(s: str) -> str {
-    let r = "";
-    loop_chars(s, { |c| r += escape_char(c); true });
-    r
+FIXME: rename to 'chars_loop' (change? currently a synonym to 'all')
+ */
+fn loop_chars(s: str, it: fn(char) -> bool) -> bool{
+    ret loop_chars_sub(s, 0u, byte_len(s), it);
 }
 
 /*
@@ -1210,32 +860,542 @@ fn map(ss: str, ff: fn(char) -> char) -> str {
 }
 
 /*
-Function: windowed
+Function: bytes_iter
 
-Create a vector of substrings of size `nn`
+Iterate over the bytes in a string
+
+FIXME: Should it really include the last byte '\0'?
 */
-fn windowed(nn: uint, ss: str) -> [str] {
-    let ww = [];
-    let len = str::char_len(ss);
+fn bytes_iter(ss: str, it: fn(u8)) {
+    let pos = 0u;
+    let len = byte_len(ss);
 
-    assert 1u <= nn;
+    while (pos < len) {
+        it(ss[pos]);
+        pos += 1u;
+    }
+}
 
-    let ii = 0u;
-    while ii+nn <= len {
-        let w = char_slice( ss, ii, ii+nn );
-        vec::push(ww,w);
-        ii += 1u;
+/*
+Function: iter_chars
+
+Iterate over the characters in a string
+
+FIXME: rename to 'chars_iter'
+*/
+fn iter_chars(s: str, it: fn(char)) {
+    let pos = 0u, len = byte_len(s);
+    while (pos < len) {
+        let {ch, next} = char_range_at(s, pos);
+        pos = next;
+        it(ch);
     }
+}
 
-    ret ww;
+/*
+Function: chars_iter
+
+Iterate over the characters in a string
+
+FIXME: A synonym to iter_chars
+*/
+fn chars_iter(ss: str, it: fn(char)) {
+    iter_chars(ss, it)
+}
+
+/*
+Function: words_iter
+
+Apply a function to each word
+*/
+fn words_iter(ss: str, ff: fn(&&str)) {
+    vec::iter(words(ss), ff)
+}
+
+/*
+Function: lines_iter
+
+Apply a function to each lines (by '\n')
+*/
+fn lines_iter(ss: str, ff: fn(&&str)) {
+    vec::iter(lines(ss), ff)
+}
+
+// FIXME: ADD split_char_iter
+// FIXME: ADD splitn_char_iter
+
+/*
+Section: Searching
+*/
+
+/*
+Function: index
+
+Returns the index of the first matching byte. Returns -1 if
+no match is found.
+*/
+fn index(s: str, c: u8) -> int {
+    let i: int = 0;
+    for k: u8 in s { if k == c { ret i; } i += 1; }
+    ret -1;
+}
+
+/*
+Function: rindex
+
+Returns the index of the last matching byte. Returns -1
+if no match is found.
+*/
+fn rindex(s: str, c: u8) -> int {
+    let n: int = byte_len(s) as int;
+    while n >= 0 { if s[n] == c { ret n; } n -= 1; }
+    ret n;
+}
+
+/*
+Function: find
+
+Finds the index of the first matching substring.
+Returns -1 if `haystack` does not contain `needle`.
+
+Parameters:
+
+haystack - The string to look in
+needle - The string to look for
+
+Returns:
+
+The index of the first occurance of `needle`, or -1 if not found.
+*/
+fn find(haystack: str, needle: str) -> int {
+    let haystack_len: int = byte_len(haystack) as int;
+    let needle_len: int = byte_len(needle) as int;
+    if needle_len == 0 { ret 0; }
+    fn match_at(haystack: str, needle: str, i: int) -> bool {
+        let j: int = i;
+        for c: u8 in needle { if haystack[j] != c { ret false; } j += 1; }
+        ret true;
+    }
+    let i: int = 0;
+    while i <= haystack_len - needle_len {
+        if match_at(haystack, needle, i) { ret i; }
+        i += 1;
+    }
+    ret -1;
 }
 
+/*
+Function: contains
+
+Returns true if one string contains another
+
+Parameters:
+
+haystack - The string to look in
+needle - The string to look for
+*/
+fn contains(haystack: str, needle: str) -> bool {
+    0 <= find(haystack, needle)
+}
+
+/*
+Function: starts_with
+
+Returns true if one string starts with another
+
+Parameters:
+
+haystack - The string to look in
+needle - The string to look for
+*/
+fn starts_with(haystack: str, needle: str) -> bool {
+    let haystack_len: uint = byte_len(haystack);
+    let needle_len: uint = byte_len(needle);
+    if needle_len == 0u { ret true; }
+    if needle_len > haystack_len { ret false; }
+    ret eq(substr(haystack, 0u, needle_len), needle);
+}
+
+/*
+Function: ends_with
+
+Returns true if one string ends with another
+
+haystack - The string to look in
+needle - The string to look for
+*/
+fn ends_with(haystack: str, needle: str) -> bool {
+    let haystack_len: uint = byte_len(haystack);
+    let needle_len: uint = byte_len(needle);
+    ret if needle_len == 0u {
+            true
+        } else if needle_len > haystack_len {
+            false
+        } else {
+            eq(substr(haystack, haystack_len - needle_len, needle_len),
+               needle)
+        };
+}
+
+/*
+Section: String properties
+*/
+
+/*
+Function: is_ascii
+
+Determines if a string contains only ASCII characters
+
+FIXME: possibly implement using char::is_ascii when it exists
+*/
+fn is_ascii(s: str) -> bool {
+    let i: uint = byte_len(s);
+    while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } }
+    ret true;
+}
+
+/*
+Predicate: is_empty
+
+Returns true if the string has length 0
+*/
+pure fn is_empty(s: str) -> bool { for c: u8 in s { ret false; } ret true; }
+
+/*
+Predicate: is_not_empty
+
+Returns true if the string has length greater than 0
+*/
+pure fn is_not_empty(s: str) -> bool { !is_empty(s) }
+
+/*
+Function: is_whitespace
+
+Returns true if the string contains only whitespace
+*/
+fn is_whitespace(s: str) -> bool {
+    ret loop_chars(s, char::is_whitespace);
+}
+
+/*
+Function: byte_len
+
+Returns the length in bytes of a string
+
+FIXME: rename to 'len_bytes'?
+*/
+pure fn byte_len(s: str) -> uint unsafe {
+    let v: [u8] = unsafe::reinterpret_cast(s);
+    let vlen = vec::len(v);
+    unsafe::leak(v);
+    // There should always be a null terminator
+    assert (vlen > 0u);
+    ret vlen - 1u;
+}
+
+/*
+Function: char_len
+
+Count the number of unicode characters in a string
+
+FIXME: rename to 'len_chars'?
+*/
+fn char_len(s: str) -> uint {
+    ret char_len_range(s, 0u, byte_len(s));
+}
+
+/*
+Section: Misc
+*/
+
+/*
+Function: is_utf8
+
+Determines if a vector of bytes contains valid UTF-8
+*/
+fn is_utf8(v: [u8]) -> bool {
+    let i = 0u;
+    let total = vec::len::<u8>(v);
+    while i < total {
+        let chsize = utf8_char_width(v[i]);
+        if chsize == 0u { ret false; }
+        if i + chsize > total { ret false; }
+        i += 1u;
+        while chsize > 1u {
+            if v[i] & 192u8 != tag_cont_u8 { ret false; }
+            i += 1u;
+            chsize -= 1u;
+        }
+    }
+    ret true;
+}
+
+/*
+Function: char_len_range
+
+As char_len but for a slice of a string
+
+Parameters:
+ s           - A valid string
+ byte_start  - The position inside `s` where to start counting in bytes.
+ byte_len    - The number of bytes of `s` to take into account.
+
+Returns:
+ The number of Unicode characters in `s` in
+segment [byte_start, byte_start+len( .
+
+Safety note:
+- This function does not check whether the substring is valid.
+- This function fails if `byte_offset` or `byte_len` do not
+ represent valid positions inside `s`
+
+FIXME: rename to 'substr_len_chars'
+*/
+fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
+    let i     = byte_start;
+    let len   = 0u;
+    while i < byte_len {
+        let chsize = utf8_char_width(s[i]);
+        assert (chsize > 0u);
+        len += 1u;
+        i += chsize;
+    }
+    assert (i == byte_len);
+    ret len;
+}
+
+/*
+Function: byte_len_range
+
+As byte_len but for a substring
+
+Parameters:
+s - A string
+byte_offset - The byte offset at which to start in the string
+char_len    - The number of chars (not bytes!) in the range
+
+Returns:
+The number of bytes in the substring starting at `byte_offset` and
+containing `char_len` chars.
+
+Safety note:
+
+This function fails if `byte_offset` or `char_len` do not represent
+valid positions in `s`
+
+FIXME: rename to 'substr_len_bytes'
+*/
+fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
+    let i = byte_offset;
+    let chars = 0u;
+    while chars < char_len {
+        let chsize = utf8_char_width(s[i]);
+        assert (chsize > 0u);
+        i += chsize;
+        chars += 1u;
+    }
+    ret i - byte_offset;
+}
+
+/*
+Function: utf8_char_width
+
+Given a first byte, determine how many bytes are in this UTF-8 character
+
+*/
+pure fn utf8_char_width(b: u8) -> uint {
+    let byte: uint = b as uint;
+    if byte < 128u { ret 1u; }
+    if byte < 192u {
+        ret 0u; // Not a valid start byte
+
+    }
+    if byte < 224u { ret 2u; }
+    if byte < 240u { ret 3u; }
+    if byte < 248u { ret 4u; }
+    if byte < 252u { ret 5u; }
+    ret 6u;
+}
+
+/*
+Function: char_range_at
+
+Pluck a character out of a string and return the index of the next character.
+This function can be used to iterate over the unicode characters of a string.
+
+Example:
+> let s = "中华Việt Nam";
+> let i = 0u;
+> while i < str::byte_len(s) {
+>    let {ch, next} = str::char_range_at(s, i);
+>    std::io::println(#fmt("%u: %c",i,ch));
+>    i = next;
+> }
+
+Example output:
+
+      0: 中
+      3: 华
+      6: V
+      7: i
+      8: ệ
+      11: t
+      12:
+      13: N
+      14: a
+      15: m
+
+Parameters:
+
+s - The string
+i - The byte offset of the char to extract
+
+Returns:
+
+A record {ch: char, next: uint} containing the char value and the byte
+index of the next unicode character.
+
+Failure:
+
+If `i` is greater than or equal to the length of the string.
+If `i` is not the index of the beginning of a valid UTF-8 character.
+*/
+fn char_range_at(s: str, i: uint) -> {ch: char, next: uint} {
+    let b0 = s[i];
+    let w = utf8_char_width(b0);
+    assert (w != 0u);
+    if w == 1u { ret {ch: b0 as char, next: i + 1u}; }
+    let val = 0u;
+    let end = i + w;
+    let i = i + 1u;
+    while i < end {
+        let byte = s[i];
+        assert (byte & 192u8 == tag_cont_u8);
+        val <<= 6u;
+        val += byte & 63u8 as uint;
+        i += 1u;
+    }
+    // Clunky way to get the right bits from the first byte. Uses two shifts,
+    // the first to clip off the marker bits at the left of the byte, and then
+    // a second (as uint) to get it to the right position.
+    val += (b0 << (w + 1u as u8) as uint) << ((w - 1u) * 6u - w - 1u);
+    ret {ch: val as char, next: i};
+}
+
+/*
+Function: char_at
+
+Pluck a character out of a string
+*/
+fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
+
+/*
+Function: loop_chars_sub
+
+Loop through a substring, char by char
+
+Parameters:
+s           - A string to traverse. It may be empty.
+byte_offset - The byte offset at which to start in the string.
+byte_len    - The number of bytes to traverse in the string
+it          - A block to execute with each consecutive character of `s`.
+Return `true` to continue, `false` to stop.
+
+Returns:
+
+`true` If execution proceeded correctly, `false` if it was interrupted,
+that is if `it` returned `false` at any point.
+
+Safety note:
+- This function does not check whether the substring is valid.
+- This function fails if `byte_offset` or `byte_len` do not
+ represent valid positions inside `s`
+
+FIXME: rename to 'substr_all'
+ */
+fn loop_chars_sub(s: str, byte_offset: uint, byte_len: uint,
+              it: fn(char) -> bool) -> bool {
+   let i = byte_offset;
+   let result = true;
+   while i < byte_len {
+      let {ch, next} = char_range_at(s, i);
+      if !it(ch) {result = false; break;}
+      i = next;
+   }
+   ret result;
+}
+
+
+/*
+Function: escape_char
+
+Escapes a single character.
+*/
+fn escape_char(c: char) -> str {
+    alt c {
+      '"' { "\\\"" }
+      '\\' { "\\\\" }
+      '\n' { "\\n" }
+      '\t' { "\\t" }
+      '\r' { "\\r" }
+      // FIXME: uncomment this when extfmt is moved to core
+      // in a snapshot.
+      // '\x00' to '\x1f' { #fmt["\\x%02x", c as uint] }
+      v { from_char(c) }
+    }
+}
+
+// UTF-8 tags and ranges
+const tag_cont_u8: u8 = 128u8;
+const tag_cont: uint = 128u;
+const max_one_b: uint = 128u;
+const tag_two_b: uint = 192u;
+const max_two_b: uint = 2048u;
+const tag_three_b: uint = 224u;
+const max_three_b: uint = 65536u;
+const tag_four_b: uint = 240u;
+const max_four_b: uint = 2097152u;
+const tag_five_b: uint = 248u;
+const max_five_b: uint = 67108864u;
+const tag_six_b: uint = 252u;
+
+// NB: This is intentionally unexported because it's easy to misuse (there's
+// no guarantee that the string is rooted). Instead, use as_buf below.
+unsafe fn buf(s: str) -> sbuf {
+    let saddr = ptr::addr_of(s);
+    let vaddr: *[u8] = unsafe::reinterpret_cast(saddr);
+    let buf = vec::to_ptr(*vaddr);
+    ret buf;
+}
+
+/*
+Function: as_buf
+
+Work with the byte buffer of a string. Allows for unsafe manipulation
+of strings, which is useful for native interop.
+
+Example:
+
+> let s = str::as_buf("PATH", { |path_buf| libc::getenv(path_buf) });
+
+*/
+fn as_buf<T>(s: str, f: fn(sbuf) -> T) -> T unsafe {
+    let buf = buf(s); f(buf)
+}
+
+/*
+Type: sbuf
+
+An unsafe buffer of bytes. Corresponds to a C char pointer.
+*/
+type sbuf = *u8;
+
+
 #[cfg(test)]
 mod tests {
 
     #[test]
-    fn test_eq() {
-        assert (eq("", ""));
+    fn test_eq() { assert (eq("", ""));
         assert (eq("foo", "foo"));
         assert (!eq("foo", "bar"));
     }

From 3c81aa9c2b86d7d5f9bf498c8835c1f7386ecddb Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Tue, 24 Jan 2012 01:34:18 -0800
Subject: [PATCH 2/2] fix a typo

---
 src/libcore/str.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 2d4d1a7419d64..823f7cc78c6b2 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1395,7 +1395,8 @@ type sbuf = *u8;
 mod tests {
 
     #[test]
-    fn test_eq() { assert (eq("", ""));
+    fn test_eq() {
+        assert (eq("", ""));
         assert (eq("foo", "foo"));
         assert (!eq("foo", "bar"));
     }