From 2d26599d62bb703943b82a00e0138fa676e37e76 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Sun, 2 Feb 2025 12:20:10 +0800 Subject: [PATCH 1/4] Add supports for Hive's `GROUP BY .. GROUPING SETS` syntax and supports for Clickhouse's `GROUP BY GROUPING SETS` syntax --- src/ast/query.rs | 20 +++++++- src/dialect/clickhouse.rs | 10 ++++ src/dialect/generic.rs | 8 +++ src/dialect/hive.rs | 16 ++++-- src/dialect/mod.rs | 10 ++++ src/parser/mod.rs | 12 ++++- tests/sqlparser_clickhouse.rs | 55 -------------------- tests/sqlparser_common.rs | 95 +++++++++++++++++++++++++++++++++++ 8 files changed, 166 insertions(+), 60 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 239e14554..61daafca0 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2522,13 +2522,18 @@ impl fmt::Display for SelectInto { /// e.g. GROUP BY year WITH ROLLUP WITH TOTALS /// /// [ClickHouse]: -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GroupByWithModifier { Rollup, Cube, Totals, + /// Hive supports GROUP BY GROUPING SETS syntax. + /// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month)) + /// + /// [Hive]: + GroupingSets(Expr), } impl fmt::Display for GroupByWithModifier { @@ -2537,6 +2542,19 @@ impl fmt::Display for GroupByWithModifier { GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"), GroupByWithModifier::Cube => write!(f, "WITH CUBE"), GroupByWithModifier::Totals => write!(f, "WITH TOTALS"), + GroupByWithModifier::GroupingSets(expr) => match expr { + Expr::GroupingSets(sets) => { + write!(f, "GROUPING SETS (")?; + let mut sep = ""; + for set in sets { + write!(f, "{sep}")?; + sep = ", "; + write!(f, "({})", display_comma_separated(set))?; + } + write!(f, ")") + } + _ => unreachable!(), + }, } } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 9a0884a51..dbc89a028 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -75,4 +75,14 @@ impl Dialect for ClickHouseDialect { fn supports_lambda_functions(&self) -> bool { true } + + // See + fn supports_group_by_expr(&self) -> bool { + true + } + + /// See + fn supports_group_by_with_modifier(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 4021b5753..6b3839089 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -48,6 +48,14 @@ impl Dialect for GenericDialect { true } + fn supports_group_by_with_modifier(&self) -> bool { + true + } + + fn supports_group_by_special_grouping_sets(&self) -> bool { + true + } + fn supports_connect_by(&self) -> bool { true } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 80f44cf7c..0f8f3a1c2 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -52,18 +52,28 @@ impl Dialect for HiveDialect { true } - /// See Hive + /// See fn supports_bang_not_operator(&self) -> bool { true } - /// See Hive + /// See fn supports_load_data(&self) -> bool { true } - /// See Hive + /// See fn supports_table_sample_before_alias(&self) -> bool { true } + + /// See + fn supports_group_by_with_modifier(&self) -> bool { + true + } + + /// See + fn supports_group_by_special_grouping_sets(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b648869d2..173e00957 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -245,6 +245,16 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialects supports `with rollup/cube/all` expressions. + fn supports_group_by_with_modifier(&self) -> bool { + false + } + + /// Returns true if the dialects supports `group by .. grouping sets` expressions. + fn supports_group_by_special_grouping_sets(&self) -> bool { + false + } + /// Returns true if the dialect supports CONNECT BY. fn supports_connect_by(&self) -> bool { false diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ca858c42e..cf3173a13 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9069,7 +9069,7 @@ impl<'a> Parser<'a> { }; let mut modifiers = vec![]; - if dialect_of!(self is ClickHouseDialect | GenericDialect) { + if self.dialect.supports_group_by_with_modifier() { loop { if !self.parse_keyword(Keyword::WITH) { break; @@ -9092,6 +9092,16 @@ impl<'a> Parser<'a> { }); } } + if self.dialect.supports_group_by_special_grouping_sets() + && self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) + { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + self.expect_token(&Token::RParen)?; + modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets( + result, + ))); + }; let group_by = match expressions { None => GroupByExpr::All(modifiers), Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 0f22db389..a27d81480 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1068,61 +1068,6 @@ fn parse_create_materialized_view() { clickhouse_and_generic().verified_stmt(sql); } -#[test] -fn parse_group_by_with_modifier() { - let clauses = ["x", "a, b", "ALL"]; - let modifiers = [ - "WITH ROLLUP", - "WITH CUBE", - "WITH TOTALS", - "WITH ROLLUP WITH CUBE", - ]; - let expected_modifiers = [ - vec![GroupByWithModifier::Rollup], - vec![GroupByWithModifier::Cube], - vec![GroupByWithModifier::Totals], - vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube], - ]; - for clause in &clauses { - for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) { - let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}"); - match clickhouse_and_generic().verified_stmt(&sql) { - Statement::Query(query) => { - let group_by = &query.body.as_select().unwrap().group_by; - if clause == &"ALL" { - assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec())); - } else { - assert_eq!( - group_by, - &GroupByExpr::Expressions( - clause - .split(", ") - .map(|c| Identifier(Ident::new(c))) - .collect(), - expected_modifier.to_vec() - ) - ); - } - } - _ => unreachable!(), - } - } - } - - // invalid cases - let invalid_cases = [ - "SELECT * FROM t GROUP BY x WITH", - "SELECT * FROM t GROUP BY x WITH ROLLUP CUBE", - "SELECT * FROM t GROUP BY x WITH WITH ROLLUP", - "SELECT * FROM t GROUP BY WITH ROLLUP", - ]; - for sql in invalid_cases { - clickhouse_and_generic() - .parse_sql_statements(sql) - .expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH"); - } -} - #[test] fn parse_select_order_by_with_fill_interpolate() { let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6113a3703..43167c130 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2446,6 +2446,101 @@ fn parse_select_group_by_all() { ); } +#[test] +fn parse_group_by_with_modifier() { + let clauses = ["x", "a, b", "ALL"]; + let modifiers = [ + "WITH ROLLUP", + "WITH CUBE", + "WITH TOTALS", + "WITH ROLLUP WITH CUBE", + ]; + let expected_modifiers = [ + vec![GroupByWithModifier::Rollup], + vec![GroupByWithModifier::Cube], + vec![GroupByWithModifier::Totals], + vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube], + ]; + let dialects = all_dialects_where(|d| d.supports_group_by_with_modifier()); + + for clause in &clauses { + for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) { + let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}"); + match dialects.verified_stmt(&sql) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + if clause == &"ALL" { + assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec())); + } else { + assert_eq!( + group_by, + &GroupByExpr::Expressions( + clause + .split(", ") + .map(|c| Identifier(Ident::new(c))) + .collect(), + expected_modifier.to_vec() + ) + ); + } + } + _ => unreachable!(), + } + } + } + + // invalid cases + let invalid_cases = [ + "SELECT * FROM t GROUP BY x WITH", + "SELECT * FROM t GROUP BY x WITH ROLLUP CUBE", + "SELECT * FROM t GROUP BY x WITH WITH ROLLUP", + "SELECT * FROM t GROUP BY WITH ROLLUP", + ]; + for sql in invalid_cases { + dialects + .parse_sql_statements(sql) + .expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH"); + } +} + +#[test] +fn parse_select_grouping_sets() { + let dialects = all_dialects_where(|d| d.supports_group_by_special_grouping_sets()); + + let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())"; + match dialects.verified_stmt(sql) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + assert_eq!( + group_by, + &GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")) + ], + vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![ + vec![ + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")) + ], + vec![Expr::Identifier(Ident::new("a")),], + vec![Expr::Identifier(Ident::new("b"))], + vec![] + ]))] + ) + ); + } + _ => unreachable!(), + } + + let dialects = all_dialects_where(|d| !d.supports_group_by_special_grouping_sets()); + + assert_eq!( + dialects.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected: end of statement, found: GROUPING".to_string()) + ); +} + #[test] fn parse_select_having() { let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1"; From 07bc7e1354a5253a783c6f0e00cd9fef23600c16 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Sun, 2 Feb 2025 12:25:34 +0800 Subject: [PATCH 2/4] rename test code --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 43167c130..61096239a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2504,7 +2504,7 @@ fn parse_group_by_with_modifier() { } #[test] -fn parse_select_grouping_sets() { +fn parse_group_by_special_grouping_sets() { let dialects = all_dialects_where(|d| d.supports_group_by_special_grouping_sets()); let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())"; From 95f989e915c658080e9f6114b221768b4666fcf1 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Thu, 13 Feb 2025 21:34:09 +0800 Subject: [PATCH 3/4] Update src/dialect/mod.rs more doc Co-authored-by: Ifeanyi Ubah --- src/dialect/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 173e00957..2755569c4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -245,7 +245,8 @@ pub trait Dialect: Debug + Any { false } - /// Returns true if the dialects supports `with rollup/cube/all` expressions. + /// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword. + /// Example: `GROUP BY value WITH ROLLUP`. fn supports_group_by_with_modifier(&self) -> bool { false } From 76720babfd751b9329d477c79967057885d0d293 Mon Sep 17 00:00:00 2001 From: wugeer <1284057728@qq.com> Date: Thu, 13 Feb 2025 21:40:48 +0800 Subject: [PATCH 4/4] more rustc --- src/ast/query.rs | 16 +++------------- src/dialect/generic.rs | 4 ---- src/dialect/hive.rs | 5 ----- src/dialect/mod.rs | 5 ----- src/parser/mod.rs | 4 +--- tests/sqlparser_common.rs | 11 +---------- 6 files changed, 5 insertions(+), 40 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 6d22d50fb..097a4ad4c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2567,19 +2567,9 @@ impl fmt::Display for GroupByWithModifier { GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"), GroupByWithModifier::Cube => write!(f, "WITH CUBE"), GroupByWithModifier::Totals => write!(f, "WITH TOTALS"), - GroupByWithModifier::GroupingSets(expr) => match expr { - Expr::GroupingSets(sets) => { - write!(f, "GROUPING SETS (")?; - let mut sep = ""; - for set in sets { - write!(f, "{sep}")?; - sep = ", "; - write!(f, "({})", display_comma_separated(set))?; - } - write!(f, ")") - } - _ => unreachable!(), - }, + GroupByWithModifier::GroupingSets(expr) => { + write!(f, "{expr}") + } } } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index cc3213af9..e04a288d6 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -52,10 +52,6 @@ impl Dialect for GenericDialect { true } - fn supports_group_by_special_grouping_sets(&self) -> bool { - true - } - fn supports_connect_by(&self) -> bool { true } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 0f8f3a1c2..3e15d395b 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -71,9 +71,4 @@ impl Dialect for HiveDialect { fn supports_group_by_with_modifier(&self) -> bool { true } - - /// See - fn supports_group_by_special_grouping_sets(&self) -> bool { - true - } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 335930e43..031fe9676 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -251,11 +251,6 @@ pub trait Dialect: Debug + Any { false } - /// Returns true if the dialects supports `group by .. grouping sets` expressions. - fn supports_group_by_special_grouping_sets(&self) -> bool { - false - } - /// Returns true if the dialect supports CONNECT BY. fn supports_connect_by(&self) -> bool { false diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4c85d309d..7c03c4e7e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9171,9 +9171,7 @@ impl<'a> Parser<'a> { }); } } - if self.dialect.supports_group_by_special_grouping_sets() - && self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) - { + if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 09725f8d0..e4fa2e837 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2506,10 +2506,8 @@ fn parse_group_by_with_modifier() { #[test] fn parse_group_by_special_grouping_sets() { - let dialects = all_dialects_where(|d| d.supports_group_by_special_grouping_sets()); - let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())"; - match dialects.verified_stmt(sql) { + match all_dialects().verified_stmt(sql) { Statement::Query(query) => { let group_by = &query.body.as_select().unwrap().group_by; assert_eq!( @@ -2533,13 +2531,6 @@ fn parse_group_by_special_grouping_sets() { } _ => unreachable!(), } - - let dialects = all_dialects_where(|d| !d.supports_group_by_special_grouping_sets()); - - assert_eq!( - dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected: end of statement, found: GROUPING".to_string()) - ); } #[test]