Skip to content

Commit f5d0b69

Browse files
authored
syntax: accept {,n} as an equivalent to {0,n}
Most regular expression engines don't accept the `{,n}` syntax, but some other do it (namely Python's `re` library). This introduces a new parser configuration option that enables the `{,n}` syntax. PR #1086
1 parent aa2d8bd commit f5d0b69

File tree

1 file changed

+61
-8
lines changed

1 file changed

+61
-8
lines changed

regex-syntax/src/ast/parse.rs

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ pub struct ParserBuilder {
124124
ignore_whitespace: bool,
125125
nest_limit: u32,
126126
octal: bool,
127+
empty_min_range: bool,
127128
}
128129

129130
impl Default for ParserBuilder {
@@ -139,6 +140,7 @@ impl ParserBuilder {
139140
ignore_whitespace: false,
140141
nest_limit: 250,
141142
octal: false,
143+
empty_min_range: false,
142144
}
143145
}
144146

@@ -149,6 +151,7 @@ impl ParserBuilder {
149151
capture_index: Cell::new(0),
150152
nest_limit: self.nest_limit,
151153
octal: self.octal,
154+
empty_min_range: self.empty_min_range,
152155
initial_ignore_whitespace: self.ignore_whitespace,
153156
ignore_whitespace: Cell::new(self.ignore_whitespace),
154157
comments: RefCell::new(vec![]),
@@ -221,6 +224,18 @@ impl ParserBuilder {
221224
self.ignore_whitespace = yes;
222225
self
223226
}
227+
228+
/// Allow using `{,n}` as an equivalent to `{0,n}`.
229+
///
230+
/// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
231+
/// Most regular expression engines don't support the `{,n}` syntax, but
232+
/// some others do it, namely Python's `re` library.
233+
///
234+
/// This is disabled by default.
235+
pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
236+
self.empty_min_range = yes;
237+
self
238+
}
224239
}
225240

226241
/// A regular expression parser.
@@ -246,6 +261,9 @@ pub struct Parser {
246261
/// The initial setting for `ignore_whitespace` as provided by
247262
/// `ParserBuilder`. It is used when resetting the parser's state.
248263
initial_ignore_whitespace: bool,
264+
/// Whether the parser supports `{,n}` repetitions as an equivalent to
265+
/// `{0,n}.`
266+
empty_min_range: bool,
249267
/// Whether whitespace should be ignored. When enabled, comments are
250268
/// also permitted.
251269
ignore_whitespace: Cell<bool>,
@@ -1114,32 +1132,48 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
11141132
self.parse_decimal(),
11151133
ast::ErrorKind::DecimalEmpty,
11161134
ast::ErrorKind::RepetitionCountDecimalEmpty,
1117-
)?;
1118-
let mut range = ast::RepetitionRange::Exactly(count_start);
1135+
);
11191136
if self.is_eof() {
11201137
return Err(self.error(
11211138
Span::new(start, self.pos()),
11221139
ast::ErrorKind::RepetitionCountUnclosed,
11231140
));
11241141
}
1125-
if self.char() == ',' {
1142+
let range = if self.char() == ',' {
11261143
if !self.bump_and_bump_space() {
11271144
return Err(self.error(
11281145
Span::new(start, self.pos()),
11291146
ast::ErrorKind::RepetitionCountUnclosed,
11301147
));
11311148
}
11321149
if self.char() != '}' {
1150+
let count_start = match count_start {
1151+
Ok(c) => c,
1152+
Err(err)
1153+
if err.kind
1154+
== ast::ErrorKind::RepetitionCountDecimalEmpty =>
1155+
{
1156+
if self.parser().empty_min_range {
1157+
0
1158+
} else {
1159+
return Err(err);
1160+
}
1161+
}
1162+
err => err?,
1163+
};
11331164
let count_end = specialize_err(
11341165
self.parse_decimal(),
11351166
ast::ErrorKind::DecimalEmpty,
11361167
ast::ErrorKind::RepetitionCountDecimalEmpty,
11371168
)?;
1138-
range = ast::RepetitionRange::Bounded(count_start, count_end);
1169+
ast::RepetitionRange::Bounded(count_start, count_end)
11391170
} else {
1140-
range = ast::RepetitionRange::AtLeast(count_start);
1171+
ast::RepetitionRange::AtLeast(count_start?)
11411172
}
1142-
}
1173+
} else {
1174+
ast::RepetitionRange::Exactly(count_start?)
1175+
};
1176+
11431177
if self.is_eof() || self.char() != '}' {
11441178
return Err(self.error(
11451179
Span::new(start, self.pos()),
@@ -2459,6 +2493,11 @@ mod tests {
24592493
ParserI::new(parser, pattern)
24602494
}
24612495

2496+
fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
2497+
let parser = ParserBuilder::new().empty_min_range(true).build();
2498+
ParserI::new(parser, pattern)
2499+
}
2500+
24622501
fn parser_nest_limit(
24632502
pattern: &str,
24642503
nest_limit: u32,
@@ -3376,6 +3415,20 @@ bar
33763415
ast: Box::new(lit('a', 0)),
33773416
}))
33783417
);
3418+
assert_eq!(
3419+
parser_empty_min_range(r"a{,9}").parse(),
3420+
Ok(Ast::repetition(ast::Repetition {
3421+
span: span(0..5),
3422+
op: ast::RepetitionOp {
3423+
span: span(1..5),
3424+
kind: ast::RepetitionKind::Range(
3425+
ast::RepetitionRange::Bounded(0, 9)
3426+
),
3427+
},
3428+
greedy: true,
3429+
ast: Box::new(lit('a', 0)),
3430+
}))
3431+
);
33793432
assert_eq!(
33803433
parser_ignore_whitespace(r"a{5,9} ?").parse(),
33813434
Ok(Ast::repetition(ast::Repetition {
@@ -4596,8 +4649,8 @@ bar
45964649
assert_eq!(
45974650
parser(r"\b{ ").parse().unwrap_err(),
45984651
TestError {
4599-
span: span(4..4),
4600-
kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
4652+
span: span(2..4),
4653+
kind: ast::ErrorKind::RepetitionCountUnclosed,
46014654
}
46024655
);
46034656
// In this case, we got some valid chars that makes it look like the

0 commit comments

Comments
 (0)