diff --git a/regex_macros/src/lib.rs b/regex_macros/src/lib.rs index 5c6c32ac1f..27ff27a43d 100644 --- a/regex_macros/src/lib.rs +++ b/regex_macros/src/lib.rs @@ -494,7 +494,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, } else { quote_expr!(self.cx, found) }; - let mranges = self.match_class(casei, &ranges); + let mranges = self.match_class(&ranges); quote_expr!(self.cx, { if self.chars.prev.is_some() { let c = $get_char; @@ -529,12 +529,8 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, // Translates a character class into a match expression. // This avoids a binary search (and is hopefully replaced by a jump // table). - fn match_class(&self, casei: bool, ranges: &[(char, char)]) -> P { - let mut arms = ranges.iter().map(|&(mut start, mut end)| { - if casei { - start = start.to_uppercase().next().unwrap(); - end = end.to_uppercase().next().unwrap(); - } + fn match_class(&self, ranges: &[(char, char)]) -> P { + let mut arms = ranges.iter().map(|&(start, end)| { let pat = self.cx.pat(self.sp, ast::PatRange(quote_expr!(self.cx, $start), quote_expr!(self.cx, $end))); self.cx.arm(self.sp, vec!(pat), quote_expr!(self.cx, true)) diff --git a/regex_macros/tests/tests.rs b/regex_macros/tests/tests.rs index fed0b3df05..7dbde95463 100644 --- a/regex_macros/tests/tests.rs +++ b/regex_macros/tests/tests.rs @@ -355,6 +355,10 @@ mat!(negclass_space_comma, r"[^,\s]", ", a", Some((2, 3))); mat!(negclass_comma_space, r"[^\s,]", " ,a", Some((2, 3))); mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); +// Regression test for https://github.com/rust-lang/regex/issues/75 +mat!(regression_unsorted_binary_search_1, r"(?i)[a_]+", "A_", Some((0, 2))); +mat!(regression_unsorted_binary_search_2, r"(?i)[A_]+", "a_", Some((0, 2))); + // A whole mess of tests from Glenn Fowler's regex test suite. // Generated by the 'src/etc/regex-match-tests' program. #[path = "matches.rs"] diff --git a/src/parse.rs b/src/parse.rs index 7eb090ef44..3e14bcb34a 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -421,6 +421,13 @@ impl Parser { } } ']' if ranges.len() > 0 => { + if self.flags & FLAG_NOCASE > 0 { + // FIMXE(https://github.com/rust-lang/regex/issues/76): This is wrong. + for range in &mut ranges { + range.0 = range.0.to_uppercase().next().unwrap(); + range.1 = range.1.to_uppercase().next().unwrap(); + } + } ranges = combine_ranges(ranges); if negated { ranges = invert_ranges(ranges); diff --git a/src/vm.rs b/src/vm.rs index 84af229279..10c8bae268 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -229,12 +229,12 @@ impl<'r, 't> Nfa<'r, 't> { } } CharClass(ref ranges, flags) => { - if let Some(c) = self.chars.prev { + if let Some(mut c) = self.chars.prev { let negate = flags & FLAG_NEGATED > 0; - let casei = flags & FLAG_NOCASE > 0; - let found = - ranges.binary_search_by(|&rc| class_cmp(casei, c, rc)) - .is_ok(); + if flags & FLAG_NOCASE > 0 { + c = c.to_uppercase().next().unwrap(); + } + let found = ranges.binary_search_by(|&rc| class_cmp(c, rc)).is_ok(); if found ^ negate { self.add(nlist, pc+1, caps); } @@ -540,26 +540,9 @@ pub fn is_word(c: Option) -> bool { /// indicating whether the character is less than the start of the range, /// in the range (inclusive) or greater than the end of the range. /// -/// If `casei` is `true`, then this ordering is computed case insensitively. -/// /// This function is meant to be used with a binary search. #[inline] -fn class_cmp(casei: bool, mut textc: char, - (mut start, mut end): (char, char)) -> Ordering { - if casei { - // FIXME: This is pretty ridiculous. All of this case conversion - // can be moved outside this function: - // 1) textc should be uppercased outside the bsearch. - // 2) the character class itself should be uppercased either in the - // parser or the compiler. - // FIXME: This is too simplistic for correct Unicode support. - // See also: char_eq - // FIXME: Standard library now yields iterators, so we should take - // advantage of them. - textc = textc.to_uppercase().next().unwrap(); - start = start.to_uppercase().next().unwrap(); - end = end.to_uppercase().next().unwrap(); - } +fn class_cmp(textc: char, (start, end): (char, char)) -> Ordering { if textc >= start && textc <= end { Ordering::Equal } else if start > textc {