@@ -124,6 +124,7 @@ pub struct ParserBuilder {
124
124
ignore_whitespace : bool ,
125
125
nest_limit : u32 ,
126
126
octal : bool ,
127
+ empty_min_range : bool ,
127
128
}
128
129
129
130
impl Default for ParserBuilder {
@@ -139,6 +140,7 @@ impl ParserBuilder {
139
140
ignore_whitespace : false ,
140
141
nest_limit : 250 ,
141
142
octal : false ,
143
+ empty_min_range : false ,
142
144
}
143
145
}
144
146
@@ -149,6 +151,7 @@ impl ParserBuilder {
149
151
capture_index : Cell :: new ( 0 ) ,
150
152
nest_limit : self . nest_limit ,
151
153
octal : self . octal ,
154
+ empty_min_range : self . empty_min_range ,
152
155
initial_ignore_whitespace : self . ignore_whitespace ,
153
156
ignore_whitespace : Cell :: new ( self . ignore_whitespace ) ,
154
157
comments : RefCell :: new ( vec ! [ ] ) ,
@@ -221,6 +224,18 @@ impl ParserBuilder {
221
224
self . ignore_whitespace = yes;
222
225
self
223
226
}
227
+
228
+ /// Allow using `{,n}` as an equivalent to `{0,n}`.
229
+ ///
230
+ /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
231
+ /// Most regular expression engines don't support the `{,n}` syntax, but
232
+ /// some others do it, namely Python's `re` library.
233
+ ///
234
+ /// This is disabled by default.
235
+ pub fn empty_min_range ( & mut self , yes : bool ) -> & mut ParserBuilder {
236
+ self . empty_min_range = yes;
237
+ self
238
+ }
224
239
}
225
240
226
241
/// A regular expression parser.
@@ -246,6 +261,9 @@ pub struct Parser {
246
261
/// The initial setting for `ignore_whitespace` as provided by
247
262
/// `ParserBuilder`. It is used when resetting the parser's state.
248
263
initial_ignore_whitespace : bool ,
264
+ /// Whether the parser supports `{,n}` repetitions as an equivalent to
265
+ /// `{0,n}.`
266
+ empty_min_range : bool ,
249
267
/// Whether whitespace should be ignored. When enabled, comments are
250
268
/// also permitted.
251
269
ignore_whitespace : Cell < bool > ,
@@ -1114,32 +1132,48 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
1114
1132
self . parse_decimal ( ) ,
1115
1133
ast:: ErrorKind :: DecimalEmpty ,
1116
1134
ast:: ErrorKind :: RepetitionCountDecimalEmpty ,
1117
- ) ?;
1118
- let mut range = ast:: RepetitionRange :: Exactly ( count_start) ;
1135
+ ) ;
1119
1136
if self . is_eof ( ) {
1120
1137
return Err ( self . error (
1121
1138
Span :: new ( start, self . pos ( ) ) ,
1122
1139
ast:: ErrorKind :: RepetitionCountUnclosed ,
1123
1140
) ) ;
1124
1141
}
1125
- if self . char ( ) == ',' {
1142
+ let range = if self . char ( ) == ',' {
1126
1143
if !self . bump_and_bump_space ( ) {
1127
1144
return Err ( self . error (
1128
1145
Span :: new ( start, self . pos ( ) ) ,
1129
1146
ast:: ErrorKind :: RepetitionCountUnclosed ,
1130
1147
) ) ;
1131
1148
}
1132
1149
if self . char ( ) != '}' {
1150
+ let count_start = match count_start {
1151
+ Ok ( c) => c,
1152
+ Err ( err)
1153
+ if err. kind
1154
+ == ast:: ErrorKind :: RepetitionCountDecimalEmpty =>
1155
+ {
1156
+ if self . parser ( ) . empty_min_range {
1157
+ 0
1158
+ } else {
1159
+ return Err ( err) ;
1160
+ }
1161
+ }
1162
+ err => err?,
1163
+ } ;
1133
1164
let count_end = specialize_err (
1134
1165
self . parse_decimal ( ) ,
1135
1166
ast:: ErrorKind :: DecimalEmpty ,
1136
1167
ast:: ErrorKind :: RepetitionCountDecimalEmpty ,
1137
1168
) ?;
1138
- range = ast:: RepetitionRange :: Bounded ( count_start, count_end) ;
1169
+ ast:: RepetitionRange :: Bounded ( count_start, count_end)
1139
1170
} else {
1140
- range = ast:: RepetitionRange :: AtLeast ( count_start) ;
1171
+ ast:: RepetitionRange :: AtLeast ( count_start? )
1141
1172
}
1142
- }
1173
+ } else {
1174
+ ast:: RepetitionRange :: Exactly ( count_start?)
1175
+ } ;
1176
+
1143
1177
if self . is_eof ( ) || self . char ( ) != '}' {
1144
1178
return Err ( self . error (
1145
1179
Span :: new ( start, self . pos ( ) ) ,
@@ -2459,6 +2493,11 @@ mod tests {
2459
2493
ParserI :: new ( parser, pattern)
2460
2494
}
2461
2495
2496
+ fn parser_empty_min_range ( pattern : & str ) -> ParserI < ' _ , Parser > {
2497
+ let parser = ParserBuilder :: new ( ) . empty_min_range ( true ) . build ( ) ;
2498
+ ParserI :: new ( parser, pattern)
2499
+ }
2500
+
2462
2501
fn parser_nest_limit (
2463
2502
pattern : & str ,
2464
2503
nest_limit : u32 ,
@@ -3376,6 +3415,20 @@ bar
3376
3415
ast: Box :: new( lit( 'a' , 0 ) ) ,
3377
3416
} ) )
3378
3417
) ;
3418
+ assert_eq ! (
3419
+ parser_empty_min_range( r"a{,9}" ) . parse( ) ,
3420
+ Ok ( Ast :: repetition( ast:: Repetition {
3421
+ span: span( 0 ..5 ) ,
3422
+ op: ast:: RepetitionOp {
3423
+ span: span( 1 ..5 ) ,
3424
+ kind: ast:: RepetitionKind :: Range (
3425
+ ast:: RepetitionRange :: Bounded ( 0 , 9 )
3426
+ ) ,
3427
+ } ,
3428
+ greedy: true ,
3429
+ ast: Box :: new( lit( 'a' , 0 ) ) ,
3430
+ } ) )
3431
+ ) ;
3379
3432
assert_eq ! (
3380
3433
parser_ignore_whitespace( r"a{5,9} ?" ) . parse( ) ,
3381
3434
Ok ( Ast :: repetition( ast:: Repetition {
@@ -4596,8 +4649,8 @@ bar
4596
4649
assert_eq ! (
4597
4650
parser( r"\b{ " ) . parse( ) . unwrap_err( ) ,
4598
4651
TestError {
4599
- span: span( 4 ..4 ) ,
4600
- kind: ast:: ErrorKind :: RepetitionCountDecimalEmpty ,
4652
+ span: span( 2 ..4 ) ,
4653
+ kind: ast:: ErrorKind :: RepetitionCountUnclosed ,
4601
4654
}
4602
4655
) ;
4603
4656
// In this case, we got some valid chars that makes it look like the
0 commit comments