@@ -1025,6 +1025,8 @@ impl<'a> Parser<'a> {
1025
1025
Ok(Statement::NOTIFY { channel, payload })
1026
1026
}
1027
1027
1028
+ // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect.
1029
+ // Returns `None if no match is found.
1028
1030
fn parse_expr_prefix_by_reserved_word(
1029
1031
&mut self,
1030
1032
w: &Word,
@@ -1131,7 +1133,8 @@ impl<'a> Parser<'a> {
1131
1133
}
1132
1134
}
1133
1135
1134
- fn parse_expr_prefix_by_nonreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
1136
+ // Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
1137
+ fn parse_expr_prefix_by_unnreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
1135
1138
match self.peek_token().token {
1136
1139
Token::LParen | Token::Period => {
1137
1140
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
@@ -1245,27 +1248,252 @@ impl<'a> Parser<'a> {
1245
1248
return Ok(expr);
1246
1249
}
1247
1250
1251
+ let next_token = self.next_token();
1252
+ let expr = match next_token.token {
1253
+ Token::Word(w) => {
1254
+ // The word we consumed may fall into one of two cases: it has a special meaning, or not.
1255
+ // For example, in Snowflake, the word `interval` may have two meanings depending on the context:
1256
+ // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;`
1257
+ // ^^^^^^^^^^^^^^^^ ^^^^^^^^
1258
+ // interval expression identifier
1259
+ //
1260
+ // We first try to parse the word and following tokens as a special expression, and if that fails,
1261
+ // we rollback and try to parse it as an identifier.
1262
+ match self
1263
+ .maybe_parse_internal(|parser| parser.parse_expr_prefix_by_reserved_word(&w))
1264
+ {
1265
+ // This word indicated an expression prefix and parsing was successful
1266
+ Ok(Some(expr)) => Ok(expr),
1267
+
1268
+ // No expression prefix associated with this word
1269
+ Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word(&w)?),
1270
+
1271
+ // If parsing of the word as a special expression failed, we are facing two options:
1272
+ // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1273
+ // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1274
+ // We first try to parse the word as an identifier and if that fails
1275
+ // we rollback and return the parsing error we got from trying to parse a
1276
+ // special expression (to maintain backwards compatibility of parsing errors).
1277
+ Err(e) => {
1278
+ if !self.dialect.is_reserved_for_identifier(w.keyword) {
1279
+ if let Ok(expr) = self.maybe_parse_internal(|parser| {
1280
+ parser.parse_expr_prefix_by_unnreserved_word(&w)
1281
+ }) {
1282
+ return Ok(expr);
1283
+ }
1284
+ }
1285
+ return Err(e);
1286
+ }
1287
+ }
1288
+ } // End of Token::Word
1289
+ // array `[1, 2, 3]`
1290
+ Token::LBracket => self.parse_array_expr(false),
1291
+ tok @ Token::Minus | tok @ Token::Plus => {
1292
+ let op = if tok == Token::Plus {
1293
+ UnaryOperator::Plus
1294
+ } else {
1295
+ UnaryOperator::Minus
1296
+ };
1297
+ Ok(Expr::UnaryOp {
1298
+ op,
1299
+ expr: Box::new(
1300
+ self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?,
1301
+ ),
1302
+ })
1303
+ }
1304
+ Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1305
+ Ok(Expr::UnaryOp {
1306
+ op: UnaryOperator::BangNot,
1307
+ expr: Box::new(
1308
+ self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1309
+ ),
1310
+ })
1311
+ }
1312
+ tok @ Token::DoubleExclamationMark
1313
+ | tok @ Token::PGSquareRoot
1314
+ | tok @ Token::PGCubeRoot
1315
+ | tok @ Token::AtSign
1316
+ | tok @ Token::Tilde
1317
+ if dialect_of!(self is PostgreSqlDialect) =>
1318
+ {
1319
+ let op = match tok {
1320
+ Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
1321
+ Token::PGSquareRoot => UnaryOperator::PGSquareRoot,
1322
+ Token::PGCubeRoot => UnaryOperator::PGCubeRoot,
1323
+ Token::AtSign => UnaryOperator::PGAbs,
1324
+ Token::Tilde => UnaryOperator::PGBitwiseNot,
1325
+ _ => unreachable!(),
1326
+ };
1327
+ Ok(Expr::UnaryOp {
1328
+ op,
1329
+ expr: Box::new(
1330
+ self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?,
1331
+ ),
1332
+ })
1333
+ }
1334
+ Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1335
+ {
1336
+ self.prev_token();
1337
+ Ok(Expr::Value(self.parse_value()?))
1338
+ }
1339
+ Token::UnicodeStringLiteral(_) => {
1340
+ self.prev_token();
1341
+ Ok(Expr::Value(self.parse_value()?))
1342
+ }
1343
+ Token::Number(_, _)
1344
+ | Token::SingleQuotedString(_)
1345
+ | Token::DoubleQuotedString(_)
1346
+ | Token::TripleSingleQuotedString(_)
1347
+ | Token::TripleDoubleQuotedString(_)
1348
+ | Token::DollarQuotedString(_)
1349
+ | Token::SingleQuotedByteStringLiteral(_)
1350
+ | Token::DoubleQuotedByteStringLiteral(_)
1351
+ | Token::TripleSingleQuotedByteStringLiteral(_)
1352
+ | Token::TripleDoubleQuotedByteStringLiteral(_)
1353
+ | Token::SingleQuotedRawStringLiteral(_)
1354
+ | Token::DoubleQuotedRawStringLiteral(_)
1355
+ | Token::TripleSingleQuotedRawStringLiteral(_)
1356
+ | Token::TripleDoubleQuotedRawStringLiteral(_)
1357
+ | Token::NationalStringLiteral(_)
1358
+ | Token::HexStringLiteral(_) => {
1359
+ self.prev_token();
1360
+ Ok(Expr::Value(self.parse_value()?))
1361
+ }
1362
+ Token::LParen => {
1363
+ let expr = if let Some(expr) = self.try_parse_expr_sub_query()? {
1364
+ expr
1365
+ } else if let Some(lambda) = self.try_parse_lambda()? {
1366
+ return Ok(lambda);
1367
+ } else {
1368
+ let exprs = self.parse_comma_separated(Parser::parse_expr)?;
1369
+ match exprs.len() {
1370
+ 0 => unreachable!(), // parse_comma_separated ensures 1 or more
1371
+ 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())),
1372
+ _ => Expr::Tuple(exprs),
1373
+ }
1374
+ };
1375
+ self.expect_token(&Token::RParen)?;
1376
+ let expr = self.try_parse_method(expr)?;
1377
+ if !self.consume_token(&Token::Period) {
1378
+ Ok(expr)
1379
+ } else {
1380
+ let tok = self.next_token();
1381
+ let key = match tok.token {
1382
+ Token::Word(word) => word.to_ident(),
1383
+ _ => {
1384
+ return parser_err!(
1385
+ format!("Expected identifier, found: {tok}"),
1386
+ tok.location
1387
+ )
1388
+ }
1389
+ };
1390
+ Ok(Expr::CompositeAccess {
1391
+ expr: Box::new(expr),
1392
+ key,
1393
+ })
1394
+ }
1395
+ }
1396
+ Token::Placeholder(_) | Token::Colon | Token::AtSign => {
1397
+ self.prev_token();
1398
+ Ok(Expr::Value(self.parse_value()?))
1399
+ }
1400
+ Token::LBrace if self.dialect.supports_dictionary_syntax() => {
1401
+ self.prev_token();
1402
+ self.parse_duckdb_struct_literal()
1403
+ }
1404
+ _ => self.expected("an expression", next_token),
1405
+ }?;
1406
+
1407
+ let expr = self.try_parse_method(expr)?;
1408
+
1409
+ if self.parse_keyword(Keyword::COLLATE) {
1410
+ Ok(Expr::Collate {
1411
+ expr: Box::new(expr),
1412
+ collation: self.parse_object_name(false)?,
1413
+ })
1414
+ } else {
1415
+ Ok(expr)
1416
+ }
1417
+ }
1418
+
1419
+ /// Parse an expression prefix.
1420
+ pub fn parse_prefix2(&mut self) -> Result<Expr, ParserError> {
1421
+ // allow the dialect to override prefix parsing
1422
+ if let Some(prefix) = self.dialect.parse_prefix(self) {
1423
+ return prefix;
1424
+ }
1425
+
1426
+ // PostgreSQL allows any string literal to be preceded by a type name, indicating that the
1427
+ // string literal represents a literal of that type. Some examples:
1428
+ //
1429
+ // DATE '2020-05-20'
1430
+ // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
1431
+ // BOOL 'true'
1432
+ //
1433
+ // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
1434
+ // matters is the fact that INTERVAL string literals may optionally be followed by special
1435
+ // keywords, e.g.:
1436
+ //
1437
+ // INTERVAL '7' DAY
1438
+ //
1439
+ // Note also that naively `SELECT date` looks like a syntax error because the `date` type
1440
+ // name is not followed by a string literal, but in fact in PostgreSQL it is a valid
1441
+ // expression that should parse as the column name "date".
1442
+ let loc = self.peek_token().location;
1443
+ let opt_expr = self.maybe_parse(|parser| {
1444
+ match parser.parse_data_type()? {
1445
+ DataType::Interval => parser.parse_interval(),
1446
+ // PostgreSQL allows almost any identifier to be used as custom data type name,
1447
+ // and we support that in `parse_data_type()`. But unlike Postgres we don't
1448
+ // have a list of globally reserved keywords (since they vary across dialects),
1449
+ // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1450
+ // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1451
+ // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1452
+ // `type 'string'` syntax for the custom data types at all.
1453
+ DataType::Custom(..) => parser_err!("dummy", loc),
1454
+ data_type => Ok(Expr::TypedString {
1455
+ data_type,
1456
+ value: parser.parse_literal_string()?,
1457
+ }),
1458
+ }
1459
+ })?;
1460
+
1461
+ if let Some(expr) = opt_expr {
1462
+ return Ok(expr);
1463
+ }
1464
+
1248
1465
let next_token = self.next_token();
1249
1466
let expr = match next_token.token {
1250
1467
Token::Word(w) => {
1251
1468
// Save the parser index so we can rollback
1252
1469
let index_before = self.index;
1253
- // We first try to parse the word as the prefix of an expression.
1254
- // For example, the word INTERVAL in: SELECT INTERVAL '7' DAY
1470
+ // The word we consumed may fall into one of two cases: it's a reserved word in the dialect
1471
+ // and has a special meaning, or not. For example, in Snowflake, the word `interval` may have
1472
+ // two meanings depending on the context:
1473
+ // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM test;`
1474
+ // In its first occurrence it's part of an interval expression and in the second it's an identifier.
1475
+
1476
+ // We first try to parse the word and following tokens as a special expression, and if that fails,
1477
+ // we rollback and try to parse it as an identifier.
1255
1478
match self.parse_expr_prefix_by_reserved_word(&w) {
1256
1479
// No expression prefix associated with this word
1257
- Ok(None) => Ok(self.parse_expr_prefix_by_nonreserved_word (&w)?),
1480
+ Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word (&w)?),
1258
1481
// This word indicated an expression prefix and parsing was successful
1259
1482
Ok(Some(expr)) => Ok(expr),
1260
- // This word indicated an expression prefix but parsing failed. Two options:
1261
- // 1. Malformed statement
1262
- // 2. The dialect may allow this word as identifier as well as indicating an expression
1483
+ // If parsing of the word as a special expression failed, we are facing two options:
1484
+ // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1485
+ // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1486
+
1487
+ // We first try to parse the word as an identifier and if that fails
1488
+ // we rollback to the original position in the token stream and return parsing error
1489
+ // we got from trying to parse a special expression (to maintain backwards
1490
+ // compatibility of parsing errors).
1263
1491
Err(e) => {
1264
1492
let index_after_error = self.index;
1265
1493
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1266
1494
// Rollback before trying to parse using a different approach
1267
1495
self.index = index_before;
1268
- if let Ok(expr) = self.parse_expr_prefix_by_nonreserved_word (&w) {
1496
+ if let Ok(expr) = self.parse_expr_prefix_by_unnreserved_word (&w) {
1269
1497
return Ok(expr);
1270
1498
}
1271
1499
}
@@ -3688,18 +3916,30 @@ impl<'a> Parser<'a> {
3688
3916
}
3689
3917
3690
3918
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
3691
- pub fn maybe_parse<T, F>(&mut self, mut f: F) -> Result<Option<T>, ParserError>
3919
+ /// Returns `None` if `f` returns an error
3920
+ pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
3692
3921
where
3693
3922
F: FnMut(&mut Parser) -> Result<T, ParserError>,
3694
3923
{
3695
- let index = self.index;
3696
- match f(self) {
3924
+ match self.maybe_parse_internal(f) {
3697
3925
Ok(t) => Ok(Some(t)),
3698
- // Unwind stack if limit exceeded
3699
3926
Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded),
3700
- Err(_) => {
3927
+ _ => Ok(None),
3928
+ }
3929
+ }
3930
+
3931
+ /// Run a parser method `f`, reverting back to the current position if unsuccessful.
3932
+ pub fn maybe_parse_internal<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
3933
+ where
3934
+ F: FnMut(&mut Parser) -> Result<T, ParserError>,
3935
+ {
3936
+ let index = self.index;
3937
+ match f(self) {
3938
+ Ok(t) => Ok(t),
3939
+ Err(e) => {
3940
+ // Unwind stack if limit exceeded
3701
3941
self.index = index;
3702
- Ok(None )
3942
+ Err(e )
3703
3943
}
3704
3944
}
3705
3945
}
0 commit comments