Skip to content

Commit 2711afe

Browse files
committed
compile: ban empty sub-expressions
With the regex syntax rewrite, we now support empty subexpressions more officially. Unfortunately, the compiler has trouble with empty subexpressions in alternation branches. There's no particular reason to not support for them, but they are difficult/awkward to express with the current compiler. So just ban them for now. If one does need an empty subexpression in an alternate branch, then amusingly, something like `()?|z` will work. We could rewrite all such empty sub-expressions into `()?`, which would retain the same match semantics, but we choose to take the most conservative change possible.
1 parent 744f28b commit 2711afe

File tree

3 files changed

+38
-0
lines changed

3 files changed

+38
-0
lines changed

src/compile.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,11 +522,29 @@ impl Compiler {
522522
for e in &exprs[0..exprs.len() - 1] {
523523
self.fill_to_next(prev_hole);
524524
let split = self.push_split_hole();
525+
let prev_entry = self.insts.len();
525526
let Patch { hole, entry } = try!(self.c(e));
527+
if prev_entry == self.insts.len() {
528+
// TODO(burntsushi): It is kind of silly that we don't support
529+
// empty-subexpressions in alternates, but it is supremely
530+
// awkward to support them in the existing compiler
531+
// infrastructure. This entire compiler needs to be thrown out
532+
// anyway, so don't feel too bad.
533+
return Err(Error::Syntax(
534+
"alternations cannot currently contain \
535+
empty sub-expressions".to_string()));
536+
}
526537
holes.push(hole);
527538
prev_hole = self.fill_split(split, Some(entry), None);
528539
}
540+
let prev_entry = self.insts.len();
529541
let Patch { hole, entry } = try!(self.c(&exprs[exprs.len() - 1]));
542+
if prev_entry == self.insts.len() {
543+
// TODO(burntsushi): See TODO above.
544+
return Err(Error::Syntax(
545+
"alternations cannot currently contain \
546+
empty sub-expressions".to_string()));
547+
}
530548
holes.push(hole);
531549
self.fill(prev_hole, entry);
532550
Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })

tests/crazy.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,19 @@ mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
6161
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
6262
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
6363

64+
// Test that we handle various flavors of empty expressions.
65+
matiter!(match_empty1, r"", "", (0, 0));
66+
matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
67+
matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
68+
matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
69+
matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
70+
matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
71+
matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
72+
matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
73+
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
74+
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
75+
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
76+
6477
// Test that the DFA can handle pathological cases.
6578
// (This should result in the DFA's cache being flushed too frequently, which
6679
// should cause it to quit and fall back to the NFA algorithm.)

tests/noparse.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,10 @@ noparse!(fail_range_end_no_class, "[a-[:lower:]]");
4141
noparse!(fail_range_end_no_begin, r"[a-\A]");
4242
noparse!(fail_range_end_no_end, r"[a-\z]");
4343
noparse!(fail_range_end_no_boundary, r"[a-\b]");
44+
noparse!(fail_empty_alt1, r"|z");
45+
noparse!(fail_empty_alt2, r"z|");
46+
noparse!(fail_empty_alt3, r"|");
47+
noparse!(fail_empty_alt4, r"||");
48+
noparse!(fail_empty_alt5, r"()|z");
49+
noparse!(fail_empty_alt6, r"z|()");
50+
noparse!(fail_empty_alt7, r"(|)");

0 commit comments

Comments
 (0)