@@ -186,6 +186,15 @@ impl std::error::Error for ParserError {}
186
186
// By default, allow expressions up to this deep before erroring
187
187
const DEFAULT_REMAINING_DEPTH: usize = 50;
188
188
189
+ // A constant EOF token that can be referenced.
190
+ const EOF_TOKEN: TokenWithSpan = TokenWithSpan {
191
+ token: Token::EOF,
192
+ span: Span {
193
+ start: Location { line: 0, column: 0 },
194
+ end: Location { line: 0, column: 0 },
195
+ },
196
+ };
197
+
189
198
/// Composite types declarations using angle brackets syntax can be arbitrary
190
199
/// nested such that the following declaration is possible:
191
200
/// `ARRAY<ARRAY<INT>>`
@@ -1236,7 +1245,7 @@ impl<'a> Parser<'a> {
1236
1245
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
1237
1246
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
1238
1247
// expression that should parse as the column name "date".
1239
- let loc = self.peek_token ().span.start;
1248
+ let loc = self.peek_token_ref ().span.start;
1240
1249
let opt_expr = self.maybe_parse(|parser| {
1241
1250
match parser.parse_data_type()? {
1242
1251
DataType::Interval => parser.parse_interval(),
@@ -1259,8 +1268,14 @@ impl<'a> Parser<'a> {
1259
1268
return Ok(expr);
1260
1269
}
1261
1270
1262
- let next_token = self.next_token();
1263
- let expr = match next_token.token {
1271
+ // Cache some dialect properties to avoid lifetime issues with the
1272
+ // next_token reference.
1273
+
1274
+ let dialect = self.dialect;
1275
+
1276
+ let next_token = self.next_token_ref();
1277
+ let span = next_token.span;
1278
+ let expr = match &next_token.token {
1264
1279
Token::Word(w) => {
1265
1280
// The word we consumed may fall into one of two cases: it has a special meaning, or not.
1266
1281
// For example, in Snowflake, the word `interval` may have two meanings depending on the context:
@@ -1270,14 +1285,13 @@ impl<'a> Parser<'a> {
1270
1285
//
1271
1286
// We first try to parse the word and following tokens as a special expression, and if that fails,
1272
1287
// we rollback and try to parse it as an identifier.
1273
- match self.try_parse(|parser| {
1274
- parser.parse_expr_prefix_by_reserved_word(&w, next_token.span)
1275
- }) {
1288
+ let w = w.clone();
1289
+ match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
1276
1290
// This word indicated an expression prefix and parsing was successful
1277
1291
Ok(Some(expr)) => Ok(expr),
1278
1292
1279
1293
// No expression prefix associated with this word
1280
- Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token. span)?),
1294
+ Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?),
1281
1295
1282
1296
// If parsing of the word as a special expression failed, we are facing two options:
1283
1297
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
@@ -1288,7 +1302,7 @@ impl<'a> Parser<'a> {
1288
1302
Err(e) => {
1289
1303
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1290
1304
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1291
- parser.parse_expr_prefix_by_unreserved_word(&w, next_token. span)
1305
+ parser.parse_expr_prefix_by_unreserved_word(&w, span)
1292
1306
}) {
1293
1307
return Ok(expr);
1294
1308
}
@@ -1300,7 +1314,7 @@ impl<'a> Parser<'a> {
1300
1314
// array `[1, 2, 3]`
1301
1315
Token::LBracket => self.parse_array_expr(false),
1302
1316
tok @ Token::Minus | tok @ Token::Plus => {
1303
- let op = if tok == Token::Plus {
1317
+ let op = if * tok == Token::Plus {
1304
1318
UnaryOperator::Plus
1305
1319
} else {
1306
1320
UnaryOperator::Minus
@@ -1312,20 +1326,16 @@ impl<'a> Parser<'a> {
1312
1326
),
1313
1327
})
1314
1328
}
1315
- Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1316
- Ok(Expr::UnaryOp {
1317
- op: UnaryOperator::BangNot,
1318
- expr: Box::new(
1319
- self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1320
- ),
1321
- })
1322
- }
1329
+ Token::ExclamationMark if dialect.supports_bang_not_operator() => Ok(Expr::UnaryOp {
1330
+ op: UnaryOperator::BangNot,
1331
+ expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?),
1332
+ }),
1323
1333
tok @ Token::DoubleExclamationMark
1324
1334
| tok @ Token::PGSquareRoot
1325
1335
| tok @ Token::PGCubeRoot
1326
1336
| tok @ Token::AtSign
1327
1337
| tok @ Token::Tilde
1328
- if dialect_of!(self is PostgreSqlDialect) =>
1338
+ if dialect_is!(dialect is PostgreSqlDialect) =>
1329
1339
{
1330
1340
let op = match tok {
1331
1341
Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
@@ -1342,7 +1352,7 @@ impl<'a> Parser<'a> {
1342
1352
),
1343
1353
})
1344
1354
}
1345
- Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1355
+ Token::EscapedStringLiteral(_) if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) =>
1346
1356
{
1347
1357
self.prev_token();
1348
1358
Ok(Expr::Value(self.parse_value()?))
@@ -1408,11 +1418,11 @@ impl<'a> Parser<'a> {
1408
1418
self.prev_token();
1409
1419
Ok(Expr::Value(self.parse_value()?))
1410
1420
}
1411
- Token::LBrace if self. dialect.supports_dictionary_syntax() => {
1421
+ Token::LBrace if dialect.supports_dictionary_syntax() => {
1412
1422
self.prev_token();
1413
1423
self.parse_duckdb_struct_literal()
1414
1424
}
1415
- _ => self.expected ("an expression", next_token ),
1425
+ _ => self.expected_current ("an expression"),
1416
1426
}?;
1417
1427
1418
1428
let expr = self.try_parse_method(expr)?;
@@ -3273,11 +3283,17 @@ impl<'a> Parser<'a> {
3273
3283
}
3274
3284
3275
3285
/// Return the first non-whitespace token that has not yet been processed
3276
- /// ( or None if reached end-of-file)
3286
+ /// or Token::EOF
3277
3287
pub fn peek_token(&self) -> TokenWithSpan {
3278
3288
self.peek_nth_token(0)
3279
3289
}
3280
3290
3291
+ /// Return a reference to the first non-whitespace token that has not yet
3292
+ /// been processed or Token::EOF
3293
+ pub fn peek_token_ref(&self) -> &TokenWithSpan {
3294
+ self.peek_nth_token_ref(0)
3295
+ }
3296
+
3281
3297
/// Returns the `N` next non-whitespace tokens that have not yet been
3282
3298
/// processed.
3283
3299
///
@@ -3329,7 +3345,12 @@ impl<'a> Parser<'a> {
3329
3345
}
3330
3346
3331
3347
/// Return nth non-whitespace token that has not yet been processed
3332
- pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan {
3348
+ pub fn peek_nth_token(&self, n: usize) -> TokenWithSpan {
3349
+ self.peek_nth_token_ref(n).clone()
3350
+ }
3351
+
3352
+ /// Return nth non-whitespace token that has not yet been processed
3353
+ pub fn peek_nth_token_ref(&self, mut n: usize) -> &TokenWithSpan {
3333
3354
let mut index = self.index;
3334
3355
loop {
3335
3356
index += 1;
@@ -3340,10 +3361,7 @@ impl<'a> Parser<'a> {
3340
3361
}) => continue,
3341
3362
non_whitespace => {
3342
3363
if n == 0 {
3343
- return non_whitespace.cloned().unwrap_or(TokenWithSpan {
3344
- token: Token::EOF,
3345
- span: Span::empty(),
3346
- });
3364
+ return non_whitespace.unwrap_or(&EOF_TOKEN);
3347
3365
}
3348
3366
n -= 1;
3349
3367
}
@@ -3376,22 +3394,22 @@ impl<'a> Parser<'a> {
3376
3394
matched
3377
3395
}
3378
3396
3397
+ pub fn next_token(&mut self) -> TokenWithSpan {
3398
+ self.next_token_ref().clone()
3399
+ }
3400
+
3379
3401
/// Return the first non-whitespace token that has not yet been processed
3380
3402
/// (or None if reached end-of-file) and mark it as processed. OK to call
3381
3403
/// repeatedly after reaching EOF.
3382
- pub fn next_token (&mut self) -> TokenWithSpan {
3404
+ pub fn next_token_ref (&mut self) -> & TokenWithSpan {
3383
3405
loop {
3384
3406
self.index += 1;
3385
3407
match self.tokens.get(self.index - 1) {
3386
3408
Some(TokenWithSpan {
3387
3409
token: Token::Whitespace(_),
3388
3410
span: _,
3389
3411
}) => continue,
3390
- token => {
3391
- return token
3392
- .cloned()
3393
- .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF))
3394
- }
3412
+ token => return token.unwrap_or(&EOF_TOKEN),
3395
3413
}
3396
3414
}
3397
3415
}
@@ -3428,6 +3446,15 @@ impl<'a> Parser<'a> {
3428
3446
)
3429
3447
}
3430
3448
3449
+ /// Report that the current token was found instead of `expected`.
3450
+ pub fn expected_current<T>(&self, expected: &str) -> Result<T, ParserError> {
3451
+ let found = self.tokens.get(self.index).unwrap_or(&EOF_TOKEN);
3452
+ parser_err!(
3453
+ format!("Expected: {expected}, found: {found}"),
3454
+ found.span.start
3455
+ )
3456
+ }
3457
+
3431
3458
/// If the current token is the `expected` keyword, consume it and returns
3432
3459
/// true. Otherwise, no tokens are consumed and returns false.
3433
3460
#[must_use]
0 commit comments