Skip to content

Commit e4ebe44

Browse files
committed
Code review fixes
1 parent 02e9724 commit e4ebe44

File tree

2 files changed

+27
-227
lines changed

2 files changed

+27
-227
lines changed

src/parser/mod.rs

Lines changed: 8 additions & 226 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,7 @@ impl<'a> Parser<'a> {
11341134
}
11351135

11361136
// Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
1137-
fn parse_expr_prefix_by_unnreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
1137+
fn parse_expr_prefix_by_unreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
11381138
match self.peek_token().token {
11391139
Token::LParen | Token::Period => {
11401140
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
@@ -1259,25 +1259,23 @@ impl<'a> Parser<'a> {
12591259
//
12601260
// We first try to parse the word and following tokens as a special expression, and if that fails,
12611261
// we rollback and try to parse it as an identifier.
1262-
match self
1263-
.maybe_parse_internal(|parser| parser.parse_expr_prefix_by_reserved_word(&w))
1264-
{
1262+
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w)) {
12651263
// This word indicated an expression prefix and parsing was successful
12661264
Ok(Some(expr)) => Ok(expr),
12671265

12681266
// No expression prefix associated with this word
1269-
Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word(&w)?),
1267+
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w)?),
12701268

12711269
// If parsing of the word as a special expression failed, we are facing two options:
1272-
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1270+
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
12731271
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
12741272
// We first try to parse the word as an identifier and if that fails
12751273
// we rollback and return the parsing error we got from trying to parse a
12761274
// special expression (to maintain backwards compatibility of parsing errors).
12771275
Err(e) => {
12781276
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1279-
if let Ok(expr) = self.maybe_parse_internal(|parser| {
1280-
parser.parse_expr_prefix_by_unnreserved_word(&w)
1277+
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1278+
parser.parse_expr_prefix_by_unreserved_word(&w)
12811279
}) {
12821280
return Ok(expr);
12831281
}
@@ -1416,222 +1414,6 @@ impl<'a> Parser<'a> {
14161414
}
14171415
}
14181416

1419-
/// Parse an expression prefix.
1420-
pub fn parse_prefix2(&mut self) -> Result<Expr, ParserError> {
1421-
// allow the dialect to override prefix parsing
1422-
if let Some(prefix) = self.dialect.parse_prefix(self) {
1423-
return prefix;
1424-
}
1425-
1426-
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
1427-
// string literal represents a literal of that type. Some examples:
1428-
//
1429-
// DATE '2020-05-20'
1430-
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
1431-
// BOOL 'true'
1432-
//
1433-
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
1434-
// matters is the fact that INTERVAL string literals may optionally be followed by special
1435-
// keywords, e.g.:
1436-
//
1437-
// INTERVAL '7' DAY
1438-
//
1439-
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
1440-
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
1441-
// expression that should parse as the column name "date".
1442-
let loc = self.peek_token().location;
1443-
let opt_expr = self.maybe_parse(|parser| {
1444-
match parser.parse_data_type()? {
1445-
DataType::Interval => parser.parse_interval(),
1446-
// PostgreSQL allows almost any identifier to be used as custom data type name,
1447-
// and we support that in `parse_data_type()`. But unlike Postgres we don't
1448-
// have a list of globally reserved keywords (since they vary across dialects),
1449-
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1450-
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1451-
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1452-
// `type 'string'` syntax for the custom data types at all.
1453-
DataType::Custom(..) => parser_err!("dummy", loc),
1454-
data_type => Ok(Expr::TypedString {
1455-
data_type,
1456-
value: parser.parse_literal_string()?,
1457-
}),
1458-
}
1459-
})?;
1460-
1461-
if let Some(expr) = opt_expr {
1462-
return Ok(expr);
1463-
}
1464-
1465-
let next_token = self.next_token();
1466-
let expr = match next_token.token {
1467-
Token::Word(w) => {
1468-
// Save the parser index so we can rollback
1469-
let index_before = self.index;
1470-
// The word we consumed may fall into one of two cases: it's a reserved word in the dialect
1471-
// and has a special meaning, or not. For example, in Snowflake, the word `interval` may have
1472-
// two meanings depending on the context:
1473-
// `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM test;`
1474-
// In its first occurrence it's part of an interval expression and in the second it's an identifier.
1475-
1476-
// We first try to parse the word and following tokens as a special expression, and if that fails,
1477-
// we rollback and try to parse it as an identifier.
1478-
match self.parse_expr_prefix_by_reserved_word(&w) {
1479-
// No expression prefix associated with this word
1480-
Ok(None) => Ok(self.parse_expr_prefix_by_unnreserved_word(&w)?),
1481-
// This word indicated an expression prefix and parsing was successful
1482-
Ok(Some(expr)) => Ok(expr),
1483-
// If parsing of the word as a special expression failed, we are facing two options:
1484-
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1485-
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1486-
1487-
// We first try to parse the word as an identifier and if that fails
1488-
// we rollback to the original position in the token stream and return parsing error
1489-
// we got from trying to parse a special expression (to maintain backwards
1490-
// compatibility of parsing errors).
1491-
Err(e) => {
1492-
let index_after_error = self.index;
1493-
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1494-
// Rollback before trying to parse using a different approach
1495-
self.index = index_before;
1496-
if let Ok(expr) = self.parse_expr_prefix_by_unnreserved_word(&w) {
1497-
return Ok(expr);
1498-
}
1499-
}
1500-
self.index = index_after_error;
1501-
return Err(e);
1502-
}
1503-
}
1504-
} // End of Token::Word
1505-
// array `[1, 2, 3]`
1506-
Token::LBracket => self.parse_array_expr(false),
1507-
tok @ Token::Minus | tok @ Token::Plus => {
1508-
let op = if tok == Token::Plus {
1509-
UnaryOperator::Plus
1510-
} else {
1511-
UnaryOperator::Minus
1512-
};
1513-
Ok(Expr::UnaryOp {
1514-
op,
1515-
expr: Box::new(
1516-
self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?,
1517-
),
1518-
})
1519-
}
1520-
Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1521-
Ok(Expr::UnaryOp {
1522-
op: UnaryOperator::BangNot,
1523-
expr: Box::new(
1524-
self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1525-
),
1526-
})
1527-
}
1528-
tok @ Token::DoubleExclamationMark
1529-
| tok @ Token::PGSquareRoot
1530-
| tok @ Token::PGCubeRoot
1531-
| tok @ Token::AtSign
1532-
| tok @ Token::Tilde
1533-
if dialect_of!(self is PostgreSqlDialect) =>
1534-
{
1535-
let op = match tok {
1536-
Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
1537-
Token::PGSquareRoot => UnaryOperator::PGSquareRoot,
1538-
Token::PGCubeRoot => UnaryOperator::PGCubeRoot,
1539-
Token::AtSign => UnaryOperator::PGAbs,
1540-
Token::Tilde => UnaryOperator::PGBitwiseNot,
1541-
_ => unreachable!(),
1542-
};
1543-
Ok(Expr::UnaryOp {
1544-
op,
1545-
expr: Box::new(
1546-
self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?,
1547-
),
1548-
})
1549-
}
1550-
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1551-
{
1552-
self.prev_token();
1553-
Ok(Expr::Value(self.parse_value()?))
1554-
}
1555-
Token::UnicodeStringLiteral(_) => {
1556-
self.prev_token();
1557-
Ok(Expr::Value(self.parse_value()?))
1558-
}
1559-
Token::Number(_, _)
1560-
| Token::SingleQuotedString(_)
1561-
| Token::DoubleQuotedString(_)
1562-
| Token::TripleSingleQuotedString(_)
1563-
| Token::TripleDoubleQuotedString(_)
1564-
| Token::DollarQuotedString(_)
1565-
| Token::SingleQuotedByteStringLiteral(_)
1566-
| Token::DoubleQuotedByteStringLiteral(_)
1567-
| Token::TripleSingleQuotedByteStringLiteral(_)
1568-
| Token::TripleDoubleQuotedByteStringLiteral(_)
1569-
| Token::SingleQuotedRawStringLiteral(_)
1570-
| Token::DoubleQuotedRawStringLiteral(_)
1571-
| Token::TripleSingleQuotedRawStringLiteral(_)
1572-
| Token::TripleDoubleQuotedRawStringLiteral(_)
1573-
| Token::NationalStringLiteral(_)
1574-
| Token::HexStringLiteral(_) => {
1575-
self.prev_token();
1576-
Ok(Expr::Value(self.parse_value()?))
1577-
}
1578-
Token::LParen => {
1579-
let expr = if let Some(expr) = self.try_parse_expr_sub_query()? {
1580-
expr
1581-
} else if let Some(lambda) = self.try_parse_lambda()? {
1582-
return Ok(lambda);
1583-
} else {
1584-
let exprs = self.parse_comma_separated(Parser::parse_expr)?;
1585-
match exprs.len() {
1586-
0 => unreachable!(), // parse_comma_separated ensures 1 or more
1587-
1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())),
1588-
_ => Expr::Tuple(exprs),
1589-
}
1590-
};
1591-
self.expect_token(&Token::RParen)?;
1592-
let expr = self.try_parse_method(expr)?;
1593-
if !self.consume_token(&Token::Period) {
1594-
Ok(expr)
1595-
} else {
1596-
let tok = self.next_token();
1597-
let key = match tok.token {
1598-
Token::Word(word) => word.to_ident(),
1599-
_ => {
1600-
return parser_err!(
1601-
format!("Expected identifier, found: {tok}"),
1602-
tok.location
1603-
)
1604-
}
1605-
};
1606-
Ok(Expr::CompositeAccess {
1607-
expr: Box::new(expr),
1608-
key,
1609-
})
1610-
}
1611-
}
1612-
Token::Placeholder(_) | Token::Colon | Token::AtSign => {
1613-
self.prev_token();
1614-
Ok(Expr::Value(self.parse_value()?))
1615-
}
1616-
Token::LBrace if self.dialect.supports_dictionary_syntax() => {
1617-
self.prev_token();
1618-
self.parse_duckdb_struct_literal()
1619-
}
1620-
_ => self.expected("an expression", next_token),
1621-
}?;
1622-
1623-
let expr = self.try_parse_method(expr)?;
1624-
1625-
if self.parse_keyword(Keyword::COLLATE) {
1626-
Ok(Expr::Collate {
1627-
expr: Box::new(expr),
1628-
collation: self.parse_object_name(false)?,
1629-
})
1630-
} else {
1631-
Ok(expr)
1632-
}
1633-
}
1634-
16351417
pub fn parse_utility_options(&mut self) -> Result<Vec<UtilityOption>, ParserError> {
16361418
self.expect_token(&Token::LParen)?;
16371419
let options = self.parse_comma_separated(Self::parse_utility_option)?;
@@ -3921,15 +3703,15 @@ impl<'a> Parser<'a> {
39213703
where
39223704
F: FnMut(&mut Parser) -> Result<T, ParserError>,
39233705
{
3924-
match self.maybe_parse_internal(f) {
3706+
match self.try_parse(f) {
39253707
Ok(t) => Ok(Some(t)),
39263708
Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded),
39273709
_ => Ok(None),
39283710
}
39293711
}
39303712

39313713
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
3932-
pub fn maybe_parse_internal<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
3714+
pub fn try_parse<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
39333715
where
39343716
F: FnMut(&mut Parser) -> Result<T, ParserError>,
39353717
{

tests/sqlparser_common.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use sqlparser::dialect::{
3434
GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect,
3535
SQLiteDialect, SnowflakeDialect,
3636
};
37-
use sqlparser::keywords::ALL_KEYWORDS;
37+
use sqlparser::keywords::{Keyword, ALL_KEYWORDS};
3838
use sqlparser::parser::{Parser, ParserError, ParserOptions};
3939
use sqlparser::tokenizer::Tokenizer;
4040
use test_utils::{
@@ -12160,3 +12160,21 @@ fn parse_create_table_select() {
1216012160
);
1216112161
}
1216212162
}
12163+
12164+
#[test]
12165+
fn test_reserved_keywords_for_identifiers() {
12166+
let dialects = all_dialects_where(|d| d.is_reserved_for_identifier(Keyword::INTERVAL));
12167+
// Dialects that reserve the word INTERVAL will not allow it as an unquoted identifier
12168+
let sql = "SELECT MAX(interval) FROM tbl";
12169+
assert_eq!(
12170+
dialects.parse_sql_statements(sql),
12171+
Err(ParserError::ParserError(
12172+
"Expected: an expression, found: )".to_string()
12173+
))
12174+
);
12175+
12176+
// Dialects that do not reserve the word INTERVAL will allow it
12177+
let dialects = all_dialects_where(|d| !d.is_reserved_for_identifier(Keyword::INTERVAL));
12178+
let sql = "SELECT MAX(interval) FROM tbl";
12179+
dialects.parse_sql_statements(sql).unwrap();
12180+
}

0 commit comments

Comments
 (0)