diff --git a/src/re_bytes.rs b/src/re_bytes.rs index d71969257..9e10efbae 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -318,6 +318,35 @@ impl Regex { Split { finder: self.find_iter(text), last: 0 } } + /// Returns an iterator of substrings of `text` separated by a match of the + /// regular expression. Differs from the iterator produced by split in that + /// split_inclusive leaves the matched part as the terminator of the + /// substring. + /// + /// This method will *not* copy the text given. + /// + /// # Example + /// + /// ```rust + /// # use regex::bytes::Regex; + /// # fn main() { + /// let re = Regex::new(r"\r?\n").unwrap(); + /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb."; + /// let v: Vec<&[u8]> = re.split_inclusive(text).collect(); + /// assert_eq!(v, [ + /// &b"Mary had a little lamb\n"[..], + /// &b"little lamb\r\n"[..], + /// &b"little lamb."[..] + /// ]); + /// # } + /// ``` + pub fn split_inclusive<'r, 't>( + &'r self, + text: &'t [u8], + ) -> SplitInclusive<'r, 't> { + SplitInclusive { finder: self.find_iter(text), last: 0 } + } + /// Returns an iterator of at most `limit` substrings of `text` delimited /// by a match of the regular expression. (A `limit` of `0` will return no /// substrings.) Namely, each element of the iterator corresponds to text @@ -767,6 +796,43 @@ impl<'r, 't> Iterator for Split<'r, 't> { impl<'r, 't> FusedIterator for Split<'r, 't> {} +/// Yields all substrings delimited by a regular expression match inclusive of +/// the match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'t` is the +/// lifetime of the byte string being split. +#[derive(Debug)] +pub struct SplitInclusive<'r, 't> { + finder: Matches<'r, 't>, + last: usize, +} + +impl<'r, 't> Iterator for SplitInclusive<'r, 't> { + type Item = &'t [u8]; + + fn next(&mut self) -> Option<&'t [u8]> { + let text = self.finder.0.text(); + match self.finder.next() { + None => { + if self.last > text.len() { + None + } else { + let s = &text[self.last..]; + self.last = text.len() + 1; // Next call will return None + Some(s) + } + } + Some(m) => { + let matched = &text[self.last..m.end()]; + self.last = m.end(); + Some(matched) + } + } + } +} + +impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {} + /// Yields at most `N` substrings delimited by a regular expression match. /// /// The last substring will be whatever remains after splitting. diff --git a/src/re_unicode.rs b/src/re_unicode.rs index 60d81a7d9..667e032f5 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -371,6 +371,30 @@ impl Regex { Split { finder: self.find_iter(text), last: 0 } } + /// Returns an iterator of substrings of `text` delimited by a match of the + /// regular expression. Each element of the iterator will include the + /// delimiting match if it appears at the beginning of the element. + /// + /// This method will *not* copy the text given. + /// + /// # Example + /// + /// ```rust + /// # use regex::Regex; + /// # fn main() { + /// let re = Regex::new(r"\r?\n").unwrap(); + /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb."; + /// let v: Vec<&str> = re.split_inclusive(text).collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\r\n", "little lamb."]); + /// # } + /// ``` + pub fn split_inclusive<'r, 't>( + &'r self, + text: &'t str, + ) -> SplitInclusive<'r, 't> { + SplitInclusive { finder: self.find_iter(text), last: 0 } + } + /// Returns an iterator of at most `limit` substrings of `text` delimited /// by a match of the regular expression. (A `limit` of `0` will return no /// substrings.) Namely, each element of the iterator corresponds to text @@ -809,6 +833,43 @@ impl<'r, 't> Iterator for Split<'r, 't> { impl<'r, 't> FusedIterator for Split<'r, 't> {} +/// Yields all substrings delimited by a regular expression match inclusive of +/// the match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'t` is the +/// lifetime of the byte string being split. +#[derive(Debug)] +pub struct SplitInclusive<'r, 't> { + finder: Matches<'r, 't>, + last: usize, +} + +impl<'r, 't> Iterator for SplitInclusive<'r, 't> { + type Item = &'t str; + + fn next(&mut self) -> Option<&'t str> { + let text = self.finder.0.text(); + match self.finder.next() { + None => { + if self.last > text.len() { + None + } else { + let s = &text[self.last..]; + self.last = text.len() + 1; // Next call will return None + Some(s) + } + } + Some(m) => { + let matched = &text[self.last..m.end()]; + self.last = m.end(); + Some(matched) + } + } + } +} + +impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {} + /// Yields at most `N` substrings delimited by a regular expression match. /// /// The last substring will be whatever remains after splitting.