From 03df9d494d804f479dee8003e92aaccc8b147ca9 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Sun, 27 Oct 2024 19:06:56 +0800 Subject: [PATCH 1/5] feat: add support for special bang not operator !a and raise error for a! factorial operator in Hive dialect --- src/ast/operator.rs | 3 +++ src/dialect/hive.rs | 4 +++ src/dialect/mod.rs | 10 +++++++ src/dialect/postgresql.rs | 5 ++++ src/parser/mod.rs | 55 ++++++++++++++++++++++++++++++++++----- tests/sqlparser_common.rs | 46 ++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 6 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index c3bb379d6..e44ea2bf4 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -51,6 +51,8 @@ pub enum UnaryOperator { PGPrefixFactorial, /// Absolute value, e.g. `@ -9` (PostgreSQL-specific) PGAbs, + /// Unary logical not operator: e.g. `! false` (Hive-specific) + BangNot, } impl fmt::Display for UnaryOperator { @@ -65,6 +67,7 @@ impl fmt::Display for UnaryOperator { UnaryOperator::PGPostfixFactorial => "!", UnaryOperator::PGPrefixFactorial => "!!", UnaryOperator::PGAbs => "@", + UnaryOperator::BangNot => "!", }) } } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 63642b33c..7f180c239 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -51,4 +51,8 @@ impl Dialect for HiveDialect { fn require_interval_qualifier(&self) -> bool { true } + + fn supports_bang_not_operator(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 453fee3de..ede723eaf 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -575,6 +575,11 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `a!` expressions + fn supports_factorial_operator(&self) -> bool { + false + } + /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem` /// as an alias assignment operator, rather than a boolean expression. /// For example: the following statements are equivalent for such a dialect: @@ -591,6 +596,11 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `!a` expressions + fn supports_bang_not_operator(&self) -> bool { + false + } + /// Returns true if the dialect supports the `LISTEN` statement fn supports_listen(&self) -> bool { false diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index c40c826c4..72841c604 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -201,6 +201,11 @@ impl Dialect for PostgreSqlDialect { fn supports_notify(&self) -> bool { true } + + /// see + fn supports_factorial_operator(&self) -> bool { + true + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 942ff19fd..40e9a95d9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1194,6 +1194,14 @@ impl<'a> Parser<'a> { ), }) } + Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { + Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, + ), + }) + } tok @ Token::DoubleExclamationMark | tok @ Token::PGSquareRoot | tok @ Token::PGCubeRoot @@ -1287,7 +1295,6 @@ impl<'a> Parser<'a> { } _ => self.expected("an expression", next_token), }?; - if self.parse_keyword(Keyword::COLLATE) { Ok(Expr::Collate { expr: Box::new(expr), @@ -2047,6 +2054,13 @@ impl<'a> Parser<'a> { } } + pub fn parse_bang_not(&mut self) -> Result { + Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), + }) + } + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors @@ -2819,11 +2833,40 @@ impl<'a> Parser<'a> { format: None, }) } else if Token::ExclamationMark == tok { - // PostgreSQL factorial operation - Ok(Expr::UnaryOp { - op: UnaryOperator::PGPostfixFactorial, - expr: Box::new(expr), - }) + if self.dialect.supports_factorial_operator() { + match expr { + Expr::Value(_) | Expr::Identifier(_) | Expr::Nested(_) | Expr::BinaryOp{..} => Ok(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr: Box::new(expr), + }), + _ => { + self.expected( + "Value or Identifier or Nested or BinaryOp struct before factorial operator(!)", self.peek_token()) + }, + } + } else if self.dialect.supports_bang_not_operator() { + let token = self.next_token(); + match token.token { + Token::Word(_) | Token::Number(..) => Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new(expr), + }), + _ => { + parser_err!( + "current dialect support bang not operator, but with wrong synx", + tok.location + ) + } + } + } else { + parser_err!( + format!( + "current dialect: {:?} does not support factorial operator or bang not operator", + self.dialect + ), + self.peek_token().location + ) + } } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { self.parse_subscript(expr) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e37280636..d6333d2f5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11532,3 +11532,49 @@ fn test_select_top() { dialects.verified_stmt("SELECT TOP 3 DISTINCT * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 DISTINCT a, b, c FROM tbl"); } + +#[test] +fn parse_bang_not() { + let dialects = all_dialects_where(|d| d.supports_bang_not_operator()); + let sql = "SELECT !a, !(b > 3)"; + let Select { projection, .. } = dialects.verified_only_select(sql); + + for (i, (op, expr)) in [ + ( + UnaryOperator::BangNot, + Box::new(Expr::Identifier(Ident::new("a"))), + ), + ( + UnaryOperator::BangNot, + Box::new(Expr::Nested(Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("b"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(Value::Number("3".parse().unwrap(), false))), + }))), + ), + ] + .into_iter() + .enumerate() + { + assert_eq!( + SelectItem::UnnamedExpr(Expr::UnaryOp { op: op, expr }), + projection[i] + ) + } + + let sql = "SELECT a!"; + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "current dialect support bang not operator, but with wrong synx".to_string() + ) + ); + + let sql = "SELECT !a"; + assert_eq!( + all_dialects_where(|d| !d.supports_bang_not_operator()) + .parse_sql_statements(sql) + .unwrap_err(), + ParserError::ParserError("Expected: an expression, found: !".to_string()) + ); +} From 773d3692d4f5ab178c341a62c4fbcc83474c2251 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Wed, 30 Oct 2024 13:38:30 +0800 Subject: [PATCH 2/5] Update src/dialect/mod.rs Good Co-authored-by: Ifeanyi Ubah --- src/dialect/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index ede723eaf..7592740ca 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -596,7 +596,7 @@ pub trait Dialect: Debug + Any { false } - /// Returns true if the dialect supports `!a` expressions + /// Returns true if the dialect supports `!a` syntax for boolean `NOT` expressions. fn supports_bang_not_operator(&self) -> bool { false } From 15904d2b3c3174d08b33ede2f2c1d5ddcf81b478 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Wed, 30 Oct 2024 22:25:28 +0800 Subject: [PATCH 3/5] Modify the code according to the recommendations and add more test cases --- src/dialect/hive.rs | 1 + src/parser/mod.rs | 61 +++++++++++++++------------------------ tests/sqlparser_common.rs | 33 ++++++++++++--------- 3 files changed, 43 insertions(+), 52 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 7f180c239..b97bf69be 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -52,6 +52,7 @@ impl Dialect for HiveDialect { true } + /// See Hive fn supports_bang_not_operator(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 40e9a95d9..8561bcc31 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2054,13 +2054,6 @@ impl<'a> Parser<'a> { } } - pub fn parse_bang_not(&mut self) -> Result { - Ok(Expr::UnaryOp { - op: UnaryOperator::BangNot, - expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), - }) - } - /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors @@ -2832,40 +2825,32 @@ impl<'a> Parser<'a> { data_type: self.parse_data_type()?, format: None, }) - } else if Token::ExclamationMark == tok { - if self.dialect.supports_factorial_operator() { - match expr { - Expr::Value(_) | Expr::Identifier(_) | Expr::Nested(_) | Expr::BinaryOp{..} => Ok(Expr::UnaryOp { - op: UnaryOperator::PGPostfixFactorial, - expr: Box::new(expr), - }), - _ => { - self.expected( - "Value or Identifier or Nested or BinaryOp struct before factorial operator(!)", self.peek_token()) - }, - } - } else if self.dialect.supports_bang_not_operator() { - let token = self.next_token(); - match token.token { - Token::Word(_) | Token::Number(..) => Ok(Expr::UnaryOp { + } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { + Ok(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr: Box::new(expr), + }) + } else if Token::ExclamationMark == tok && self.dialect.supports_bang_not_operator() { + let token = self.next_token(); + match token.token { + Token::Word(_) | Token::Number(..) + if !matches!( + expr, + Expr::Value(_) + | Expr::Identifier(_) + | Expr::Nested(_) + | Expr::BinaryOp { .. } + ) => + { + Ok(Expr::UnaryOp { op: UnaryOperator::BangNot, expr: Box::new(expr), - }), - _ => { - parser_err!( - "current dialect support bang not operator, but with wrong synx", - tok.location - ) - } + }) } - } else { - parser_err!( - format!( - "current dialect: {:?} does not support factorial operator or bang not operator", - self.dialect - ), - self.peek_token().location - ) + _ => parser_err!( + "current dialect support bang not operator, but with wrong syntax", + tok.location + ), } } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d6333d2f5..4078955c0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11562,19 +11562,24 @@ fn parse_bang_not() { ) } - let sql = "SELECT a!"; - assert_eq!( - dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError( - "current dialect support bang not operator, but with wrong synx".to_string() - ) - ); + let sql_statements = ["SELECT a!", "SELECT a ! b", "SELECT a ! as b"]; - let sql = "SELECT !a"; - assert_eq!( - all_dialects_where(|d| !d.supports_bang_not_operator()) - .parse_sql_statements(sql) - .unwrap_err(), - ParserError::ParserError("Expected: an expression, found: !".to_string()) - ); + for &sql in &sql_statements { + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "current dialect support bang not operator, but with wrong syntax".to_string() + ) + ); + } + + let sql_statements = ["SELECT !a", "SELECT !a b", "SELECT !a as b"]; + let dialects = all_dialects_where(|d| !d.supports_bang_not_operator()); + + for &sql in &sql_statements { + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected: an expression, found: !".to_string()) + ); + } } From 16c76e2799236bae89cec22f4b278a2c9c7238b3 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Thu, 31 Oct 2024 20:51:44 +0800 Subject: [PATCH 4/5] add `parse_factorial_operator` test and optimize redundant code --- tests/sqlparser_common.rs | 91 +++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4078955c0..90cc2a2a2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11539,25 +11539,22 @@ fn parse_bang_not() { let sql = "SELECT !a, !(b > 3)"; let Select { projection, .. } = dialects.verified_only_select(sql); - for (i, (op, expr)) in [ - ( - UnaryOperator::BangNot, - Box::new(Expr::Identifier(Ident::new("a"))), - ), - ( - UnaryOperator::BangNot, - Box::new(Expr::Nested(Box::new(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("b"))), - op: BinaryOperator::Gt, - right: Box::new(Expr::Value(Value::Number("3".parse().unwrap(), false))), - }))), - ), + for (i, expr) in [ + Box::new(Expr::Identifier(Ident::new("a"))), + Box::new(Expr::Nested(Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("b"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(Value::Number("3".parse().unwrap(), false))), + }))), ] .into_iter() .enumerate() { assert_eq!( - SelectItem::UnnamedExpr(Expr::UnaryOp { op: op, expr }), + SelectItem::UnnamedExpr(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr + }), projection[i] ) } @@ -11583,3 +11580,69 @@ fn parse_bang_not() { ); } } + +#[test] +fn parse_factorial_operator() { + let dialects = all_dialects_where(|d| d.supports_factorial_operator()); + let sql = "SELECT a!, (b + c)!"; + let Select { projection, .. } = dialects.verified_only_select(sql); + + for (i, expr) in [ + Box::new(Expr::Identifier(Ident::new("a"))), + Box::new(Expr::Nested(Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("b"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Identifier(Ident::new("c"))), + }))), + ] + .into_iter() + .enumerate() + { + assert_eq!( + SelectItem::UnnamedExpr(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr + }), + projection[i] + ) + } + + let sql_statements = ["SELECT !a", "SELECT !a b", "SELECT !a as b"]; + + for &sql in &sql_statements { + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected: an expression, found: !".to_string()) + ); + } + + let sql_statements = ["SELECT a!", "SELECT a ! b", "SELECT a ! as b"]; + + // Due to the exclamation mark, which is both part of the `bang not` operator + // and the `factorial` operator, additional filtering not supports + // `bang not` operator is required here. + let dialects = + all_dialects_where(|d| !d.supports_factorial_operator() && !d.supports_bang_not_operator()); + + for &sql in &sql_statements { + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("No infix parser for token ExclamationMark".to_string()) + ); + } + + // Due to the exclamation mark, which is both part of the `bang not` operator + // and the `factorial` operator, additional filtering supports + // `bang not` operator is required here. + let dialects = + all_dialects_where(|d| !d.supports_factorial_operator() && d.supports_bang_not_operator()); + + for &sql in &sql_statements { + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "current dialect support bang not operator, but with wrong syntax".to_string() + ) + ); + } +} From 38a4ea71c6e244c2a83377d1d3f1d8b5604bb8e4 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Tue, 5 Nov 2024 22:08:37 +0800 Subject: [PATCH 5/5] fix conflicts --- src/parser/mod.rs | 22 ---------------------- tests/sqlparser_common.rs | 12 ++++-------- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8561bcc31..e329c0177 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2830,28 +2830,6 @@ impl<'a> Parser<'a> { op: UnaryOperator::PGPostfixFactorial, expr: Box::new(expr), }) - } else if Token::ExclamationMark == tok && self.dialect.supports_bang_not_operator() { - let token = self.next_token(); - match token.token { - Token::Word(_) | Token::Number(..) - if !matches!( - expr, - Expr::Value(_) - | Expr::Identifier(_) - | Expr::Nested(_) - | Expr::BinaryOp { .. } - ) => - { - Ok(Expr::UnaryOp { - op: UnaryOperator::BangNot, - expr: Box::new(expr), - }) - } - _ => parser_err!( - "current dialect support bang not operator, but with wrong syntax", - tok.location - ), - } } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { self.parse_subscript(expr) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 90cc2a2a2..84f2f718b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11564,9 +11564,7 @@ fn parse_bang_not() { for &sql in &sql_statements { assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError( - "current dialect support bang not operator, but with wrong syntax".to_string() - ) + ParserError::ParserError("No infix parser for token ExclamationMark".to_string()) ); } @@ -11620,7 +11618,7 @@ fn parse_factorial_operator() { // Due to the exclamation mark, which is both part of the `bang not` operator // and the `factorial` operator, additional filtering not supports - // `bang not` operator is required here. + // `bang not` operator is required here. let dialects = all_dialects_where(|d| !d.supports_factorial_operator() && !d.supports_bang_not_operator()); @@ -11633,16 +11631,14 @@ fn parse_factorial_operator() { // Due to the exclamation mark, which is both part of the `bang not` operator // and the `factorial` operator, additional filtering supports - // `bang not` operator is required here. + // `bang not` operator is required here. let dialects = all_dialects_where(|d| !d.supports_factorial_operator() && d.supports_bang_not_operator()); for &sql in &sql_statements { assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError( - "current dialect support bang not operator, but with wrong syntax".to_string() - ) + ParserError::ParserError("No infix parser for token ExclamationMark".to_string()) ); } }