@@ -1009,6 +1009,8 @@ impl<'a> Parser<'a> {
1009
1009
Ok(Statement::NOTIFY { channel, payload })
1010
1010
}
1011
1011
1012
+ // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect.
1013
+ // Returns `None if no match is found.
1012
1014
fn parse_expr_prefix_by_reserved_word(
1013
1015
&mut self,
1014
1016
w: &Word,
@@ -1115,7 +1117,8 @@ impl<'a> Parser<'a> {
1115
1117
}
1116
1118
}
1117
1119
1118
- fn parse_expr_prefix_by_nonreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
1120
+ // Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
1121
+ fn parse_expr_prefix_by_unnreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
1119
1122
match self.peek_token().token {
1120
1123
Token::LParen | Token::Period => {
1121
1124
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
@@ -1229,27 +1232,252 @@ impl<'a> Parser<'a> {
1229
1232
return Ok(expr);
1230
1233
}
1231
1234
1235
+ let next_token = self.next_token();
1236
+ let expr = match next_token.token {
1237
+ Token::Word(w) => {
1238
+ // The word we consumed may fall into one of two cases: it has a special meaning, or not.
1239
+ // For example, in Snowflake, the word `interval` may have two meanings depending on the context:
1240
+ // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;`
1241
+ // ^^^^^^^^^^^^^^^^ ^^^^^^^^
1242
+ // interval expression identifier
1243
+ //
1244
+ // We first try to parse the word and following tokens as a special expression, and if that fails,
1245
+ // we rollback and try to parse it as an identifier.
1246
+ match self
1247
+ .maybe_parse_internal(|parser| parser.parse_expr_prefix_by_reserved_word(&w))
1248
+ {
1249
+ // This word indicated an expression prefix and parsing was successful
1250
+ Ok(Some(expr)) => Ok(expr),
1251
+
1252
+ // No expression prefix associated with this word
1253
+ Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word(&w)?),
1254
+
1255
+ // If parsing of the word as a special expression failed, we are facing two options:
1256
+ // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1257
+ // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1258
+ // We first try to parse the word as an identifier and if that fails
1259
+ // we rollback and return the parsing error we got from trying to parse a
1260
+ // special expression (to maintain backwards compatibility of parsing errors).
1261
+ Err(e) => {
1262
+ if !self.dialect.is_reserved_for_identifier(w.keyword) {
1263
+ if let Ok(expr) = self.maybe_parse_internal(|parser| {
1264
+ parser.parse_expr_prefix_by_unnreserved_word(&w)
1265
+ }) {
1266
+ return Ok(expr);
1267
+ }
1268
+ }
1269
+ return Err(e);
1270
+ }
1271
+ }
1272
+ } // End of Token::Word
1273
+ // array `[1, 2, 3]`
1274
+ Token::LBracket => self.parse_array_expr(false),
1275
+ tok @ Token::Minus | tok @ Token::Plus => {
1276
+ let op = if tok == Token::Plus {
1277
+ UnaryOperator::Plus
1278
+ } else {
1279
+ UnaryOperator::Minus
1280
+ };
1281
+ Ok(Expr::UnaryOp {
1282
+ op,
1283
+ expr: Box::new(
1284
+ self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?,
1285
+ ),
1286
+ })
1287
+ }
1288
+ Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1289
+ Ok(Expr::UnaryOp {
1290
+ op: UnaryOperator::BangNot,
1291
+ expr: Box::new(
1292
+ self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1293
+ ),
1294
+ })
1295
+ }
1296
+ tok @ Token::DoubleExclamationMark
1297
+ | tok @ Token::PGSquareRoot
1298
+ | tok @ Token::PGCubeRoot
1299
+ | tok @ Token::AtSign
1300
+ | tok @ Token::Tilde
1301
+ if dialect_of!(self is PostgreSqlDialect) =>
1302
+ {
1303
+ let op = match tok {
1304
+ Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
1305
+ Token::PGSquareRoot => UnaryOperator::PGSquareRoot,
1306
+ Token::PGCubeRoot => UnaryOperator::PGCubeRoot,
1307
+ Token::AtSign => UnaryOperator::PGAbs,
1308
+ Token::Tilde => UnaryOperator::PGBitwiseNot,
1309
+ _ => unreachable!(),
1310
+ };
1311
+ Ok(Expr::UnaryOp {
1312
+ op,
1313
+ expr: Box::new(
1314
+ self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?,
1315
+ ),
1316
+ })
1317
+ }
1318
+ Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1319
+ {
1320
+ self.prev_token();
1321
+ Ok(Expr::Value(self.parse_value()?))
1322
+ }
1323
+ Token::UnicodeStringLiteral(_) => {
1324
+ self.prev_token();
1325
+ Ok(Expr::Value(self.parse_value()?))
1326
+ }
1327
+ Token::Number(_, _)
1328
+ | Token::SingleQuotedString(_)
1329
+ | Token::DoubleQuotedString(_)
1330
+ | Token::TripleSingleQuotedString(_)
1331
+ | Token::TripleDoubleQuotedString(_)
1332
+ | Token::DollarQuotedString(_)
1333
+ | Token::SingleQuotedByteStringLiteral(_)
1334
+ | Token::DoubleQuotedByteStringLiteral(_)
1335
+ | Token::TripleSingleQuotedByteStringLiteral(_)
1336
+ | Token::TripleDoubleQuotedByteStringLiteral(_)
1337
+ | Token::SingleQuotedRawStringLiteral(_)
1338
+ | Token::DoubleQuotedRawStringLiteral(_)
1339
+ | Token::TripleSingleQuotedRawStringLiteral(_)
1340
+ | Token::TripleDoubleQuotedRawStringLiteral(_)
1341
+ | Token::NationalStringLiteral(_)
1342
+ | Token::HexStringLiteral(_) => {
1343
+ self.prev_token();
1344
+ Ok(Expr::Value(self.parse_value()?))
1345
+ }
1346
+ Token::LParen => {
1347
+ let expr = if let Some(expr) = self.try_parse_expr_sub_query()? {
1348
+ expr
1349
+ } else if let Some(lambda) = self.try_parse_lambda()? {
1350
+ return Ok(lambda);
1351
+ } else {
1352
+ let exprs = self.parse_comma_separated(Parser::parse_expr)?;
1353
+ match exprs.len() {
1354
+ 0 => unreachable!(), // parse_comma_separated ensures 1 or more
1355
+ 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())),
1356
+ _ => Expr::Tuple(exprs),
1357
+ }
1358
+ };
1359
+ self.expect_token(&Token::RParen)?;
1360
+ let expr = self.try_parse_method(expr)?;
1361
+ if !self.consume_token(&Token::Period) {
1362
+ Ok(expr)
1363
+ } else {
1364
+ let tok = self.next_token();
1365
+ let key = match tok.token {
1366
+ Token::Word(word) => word.to_ident(),
1367
+ _ => {
1368
+ return parser_err!(
1369
+ format!("Expected identifier, found: {tok}"),
1370
+ tok.location
1371
+ )
1372
+ }
1373
+ };
1374
+ Ok(Expr::CompositeAccess {
1375
+ expr: Box::new(expr),
1376
+ key,
1377
+ })
1378
+ }
1379
+ }
1380
+ Token::Placeholder(_) | Token::Colon | Token::AtSign => {
1381
+ self.prev_token();
1382
+ Ok(Expr::Value(self.parse_value()?))
1383
+ }
1384
+ Token::LBrace if self.dialect.supports_dictionary_syntax() => {
1385
+ self.prev_token();
1386
+ self.parse_duckdb_struct_literal()
1387
+ }
1388
+ _ => self.expected("an expression", next_token),
1389
+ }?;
1390
+
1391
+ let expr = self.try_parse_method(expr)?;
1392
+
1393
+ if self.parse_keyword(Keyword::COLLATE) {
1394
+ Ok(Expr::Collate {
1395
+ expr: Box::new(expr),
1396
+ collation: self.parse_object_name(false)?,
1397
+ })
1398
+ } else {
1399
+ Ok(expr)
1400
+ }
1401
+ }
1402
+
1403
+ /// Parse an expression prefix.
1404
+ pub fn parse_prefix2(&mut self) -> Result<Expr, ParserError> {
1405
+ // allow the dialect to override prefix parsing
1406
+ if let Some(prefix) = self.dialect.parse_prefix(self) {
1407
+ return prefix;
1408
+ }
1409
+
1410
+ // PostgreSQL allows any string literal to be preceded by a type name, indicating that the
1411
+ // string literal represents a literal of that type. Some examples:
1412
+ //
1413
+ // DATE '2020-05-20'
1414
+ // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
1415
+ // BOOL 'true'
1416
+ //
1417
+ // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
1418
+ // matters is the fact that INTERVAL string literals may optionally be followed by special
1419
+ // keywords, e.g.:
1420
+ //
1421
+ // INTERVAL '7' DAY
1422
+ //
1423
+ // Note also that naively `SELECT date` looks like a syntax error because the `date` type
1424
+ // name is not followed by a string literal, but in fact in PostgreSQL it is a valid
1425
+ // expression that should parse as the column name "date".
1426
+ let loc = self.peek_token().location;
1427
+ let opt_expr = self.maybe_parse(|parser| {
1428
+ match parser.parse_data_type()? {
1429
+ DataType::Interval => parser.parse_interval(),
1430
+ // PostgreSQL allows almost any identifier to be used as custom data type name,
1431
+ // and we support that in `parse_data_type()`. But unlike Postgres we don't
1432
+ // have a list of globally reserved keywords (since they vary across dialects),
1433
+ // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1434
+ // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1435
+ // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1436
+ // `type 'string'` syntax for the custom data types at all.
1437
+ DataType::Custom(..) => parser_err!("dummy", loc),
1438
+ data_type => Ok(Expr::TypedString {
1439
+ data_type,
1440
+ value: parser.parse_literal_string()?,
1441
+ }),
1442
+ }
1443
+ })?;
1444
+
1445
+ if let Some(expr) = opt_expr {
1446
+ return Ok(expr);
1447
+ }
1448
+
1232
1449
let next_token = self.next_token();
1233
1450
let expr = match next_token.token {
1234
1451
Token::Word(w) => {
1235
1452
// Save the parser index so we can rollback
1236
1453
let index_before = self.index;
1237
- // We first try to parse the word as the prefix of an expression.
1238
- // For example, the word INTERVAL in: SELECT INTERVAL '7' DAY
1454
+ // The word we consumed may fall into one of two cases: it's a reserved word in the dialect
1455
+ // and has a special meaning, or not. For example, in Snowflake, the word `interval` may have
1456
+ // two meanings depending on the context:
1457
+ // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM test;`
1458
+ // In its first occurrence it's part of an interval expression and in the second it's an identifier.
1459
+
1460
+ // We first try to parse the word and following tokens as a special expression, and if that fails,
1461
+ // we rollback and try to parse it as an identifier.
1239
1462
match self.parse_expr_prefix_by_reserved_word(&w) {
1240
1463
// No expression prefix associated with this word
1241
- Ok(None) => Ok(self.parse_expr_prefix_by_nonreserved_word (&w)?),
1464
+ Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word (&w)?),
1242
1465
// This word indicated an expression prefix and parsing was successful
1243
1466
Ok(Some(expr)) => Ok(expr),
1244
- // This word indicated an expression prefix but parsing failed. Two options:
1245
- // 1. Malformed statement
1246
- // 2. The dialect may allow this word as identifier as well as indicating an expression
1467
+ // If parsing of the word as a special expression failed, we are facing two options:
1468
+ // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1469
+ // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1470
+
1471
+ // We first try to parse the word as an identifier and if that fails
1472
+ // we rollback to the original position in the token stream and return parsing error
1473
+ // we got from trying to parse a special expression (to maintain backwards
1474
+ // compatibility of parsing errors).
1247
1475
Err(e) => {
1248
1476
let index_after_error = self.index;
1249
1477
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1250
1478
// Rollback before trying to parse using a different approach
1251
1479
self.index = index_before;
1252
- if let Ok(expr) = self.parse_expr_prefix_by_nonreserved_word (&w) {
1480
+ if let Ok(expr) = self.parse_expr_prefix_by_unnreserved_word (&w) {
1253
1481
return Ok(expr);
1254
1482
}
1255
1483
}
@@ -3672,18 +3900,30 @@ impl<'a> Parser<'a> {
3672
3900
}
3673
3901
3674
3902
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
3675
- pub fn maybe_parse<T, F>(&mut self, mut f: F) -> Result<Option<T>, ParserError>
3903
+ /// Returns `None` if `f` returns an error
3904
+ pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
3676
3905
where
3677
3906
F: FnMut(&mut Parser) -> Result<T, ParserError>,
3678
3907
{
3679
- let index = self.index;
3680
- match f(self) {
3908
+ match self.maybe_parse_internal(f) {
3681
3909
Ok(t) => Ok(Some(t)),
3682
- // Unwind stack if limit exceeded
3683
3910
Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded),
3684
- Err(_) => {
3911
+ _ => Ok(None),
3912
+ }
3913
+ }
3914
+
3915
+ /// Run a parser method `f`, reverting back to the current position if unsuccessful.
3916
+ pub fn maybe_parse_internal<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
3917
+ where
3918
+ F: FnMut(&mut Parser) -> Result<T, ParserError>,
3919
+ {
3920
+ let index = self.index;
3921
+ match f(self) {
3922
+ Ok(t) => Ok(t),
3923
+ Err(e) => {
3924
+ // Unwind stack if limit exceeded
3685
3925
self.index = index;
3686
- Ok(None )
3926
+ Err(e )
3687
3927
}
3688
3928
}
3689
3929
}
0 commit comments