Skip to content

Commit 88640aa

Browse files
committed
syntax: optimize \B{10000}
Basically, whenever a counted repetition is applied to a sub-expression that can only ever match the empty string, the counted repetition can be reduced to 1. We can achieve that optimization very easily via the Hir::repetition smart constructor. This is somewhat important to do because otherwise one can write something like \B{10000}. The higher level infrastructure is somewhat dumb about this and will happily try to match \B over and over again. We should probably improve the higher level aspects of this (because this is not the only case that can cause the same assertions being repeatedly evaluated at the same position), but this fixes the most obvious ones at the HIR level.
1 parent 4766835 commit 88640aa

File tree

4 files changed

+22
-7
lines changed

4 files changed

+22
-7
lines changed

regex-syntax/src/hir/literal.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2854,13 +2854,13 @@ mod tests {
28542854
// repeats.
28552855
#[test]
28562856
fn crazy_repeats() {
2857-
assert_eq!(inexact([I("")], [I("")]), e(r"(?:){4294967295}"));
2857+
assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}"));
28582858
assert_eq!(
2859-
inexact([I("")], [I("")]),
2859+
inexact([E("")], [E("")]),
28602860
e(r"(?:){64}{64}{64}{64}{64}{64}")
28612861
);
2862-
assert_eq!(inexact([I("")], [I("")]), e(r"x{0}{4294967295}"));
2863-
assert_eq!(inexact([I("")], [I("")]), e(r"(?:|){4294967295}"));
2862+
assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}"));
2863+
assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}"));
28642864

28652865
assert_eq!(
28662866
inexact([E("")], [E("")]),

regex-syntax/src/hir/mod.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,13 @@ impl Hir {
355355

356356
/// Creates a repetition HIR expression.
357357
#[inline]
358-
pub fn repetition(rep: Repetition) -> Hir {
358+
pub fn repetition(mut rep: Repetition) -> Hir {
359+
// If the sub-expression of a repetition can only match the empty
360+
// string, then we force its maximum to be at most 1.
361+
if rep.sub.properties().maximum_len() == Some(0) {
362+
rep.min = cmp::min(rep.min, 1);
363+
rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1));
364+
}
359365
// The regex 'a{0}' is always equivalent to the empty regex. This is
360366
// true even when 'a' is an expression that never matches anything
361367
// (like '\P{any}').

regex-syntax/src/hir/print.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ mod tests {
503503
}),
504504
Hir::look(hir::Look::End),
505505
]);
506-
assert_eq!(r"(?:\A(?:\A\z)+\z)", expr.to_string());
506+
assert_eq!(r"(?:\A\A\z\z)", expr.to_string());
507507
}
508508

509509
// Just like regression_repetition_concat, but with the repetition using
@@ -540,7 +540,7 @@ mod tests {
540540
}),
541541
Hir::look(hir::Look::End),
542542
]);
543-
assert_eq!(r"(?:\A(?:\A|\z)+\z)", expr.to_string());
543+
assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string());
544544
}
545545

546546
// This regression test is very similar in flavor to

regex-syntax/src/hir/translate.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3489,6 +3489,15 @@ mod tests {
34893489
assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
34903490
}
34913491

3492+
// This tests that the smart Hir::repetition constructors does some basic
3493+
// simplifications.
3494+
#[test]
3495+
fn smart_repetition() {
3496+
assert_eq!(t(r"a{0}"), Hir::empty());
3497+
assert_eq!(t(r"a{1}"), hir_lit("a"));
3498+
assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
3499+
}
3500+
34923501
// This tests that the smart Hir::concat constructor simplifies the given
34933502
// exprs in a way we expect.
34943503
#[test]

0 commit comments

Comments
 (0)