From ceb949634eda07e1c97d1edbab0e1dcd92402011 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 16:47:26 +0200 Subject: [PATCH 01/20] support rename --- src/ast/query.rs | 9 +++++++++ src/parser/mod.rs | 12 ++++++++++++ tests/sqlparser_common.rs | 8 ++++++++ 3 files changed, 29 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index ffe1e4023..b1be3084f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2684,6 +2684,12 @@ pub enum PipeOperator { /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at TableSample { sample: Box }, + /// Renames columns in the input table. + /// + /// Syntax: `|> RENAME old_name AS new_name, ...` + /// + /// See more at + Rename { mappings: Vec }, } impl fmt::Display for PipeOperator { @@ -2739,6 +2745,9 @@ impl fmt::Display for PipeOperator { PipeOperator::TableSample { sample } => { write!(f, "{}", sample) } + PipeOperator::Rename { mappings } => { + write!(f, "RENAME {}", display_comma_separated(mappings)) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3e721072b..887fb53cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9947,6 +9947,13 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + pub fn parse_identifier_with_optional_alias(&mut self) -> Result { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + /// Optionally parses an alias for a select list item fn maybe_parse_select_item_alias(&mut self) -> Result, ParserError> { fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { @@ -11076,6 +11083,7 @@ impl<'a> Parser<'a> { Keyword::AGGREGATE, Keyword::ORDER, Keyword::TABLESAMPLE, + Keyword::RENAME, ])?; match kw { Keyword::SELECT => { @@ -11142,6 +11150,10 @@ impl<'a> Parser<'a> { let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; pipe_operators.push(PipeOperator::TableSample { sample }); } + Keyword::RENAME => { + let mappings = self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + pipe_operators.push(PipeOperator::Rename { mappings }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a1a8fc3b3..281d352ff 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15193,6 +15193,14 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + // rename pipe operator + dialects.verified_stmt("SELECT * FROM users |> RENAME old_name AS new_name"); + dialects.verified_stmt("SELECT * FROM users |> RENAME id AS user_id, name AS user_name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> RENAME id user_id", + "SELECT * FROM users |> RENAME id AS user_id", + ); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", From 365f7f7f9751a4c5ba70d17979f40d1f09b0e069 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 17:14:05 +0200 Subject: [PATCH 02/20] support union --- src/ast/query.rs | 28 ++++++++++++++++++++++++++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_common.rs | 18 ++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index b1be3084f..998a1e565 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2690,6 +2690,15 @@ pub enum PipeOperator { /// /// See more at Rename { mappings: Vec }, + /// Combines the input table with one or more tables using UNION. + /// + /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` + /// + /// See more at + Union { + set_quantifier: SetQuantifier, + queries: Vec>, + }, } impl fmt::Display for PipeOperator { @@ -2748,6 +2757,25 @@ impl fmt::Display for PipeOperator { PipeOperator::Rename { mappings } => { write!(f, "RENAME {}", display_comma_separated(mappings)) } + PipeOperator::Union { set_quantifier, queries } => { + write!(f, "UNION")?; + match set_quantifier { + SetQuantifier::All => write!(f, " ALL")?, + SetQuantifier::Distinct => write!(f, " DISTINCT")?, + SetQuantifier::None => {}, + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 887fb53cb..6cbfeffca 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11084,6 +11084,7 @@ impl<'a> Parser<'a> { Keyword::ORDER, Keyword::TABLESAMPLE, Keyword::RENAME, + Keyword::UNION, ])?; match kw { Keyword::SELECT => { @@ -11154,6 +11155,19 @@ impl<'a> Parser<'a> { let mappings = self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; pipe_operators.push(PipeOperator::Rename { mappings }); } + Keyword::UNION => { + // Reuse existing set quantifier parser for consistent BY NAME support + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); + // BigQuery UNION pipe operator requires parentheses around queries + // Parse comma-separated list of parenthesized queries + let queries = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(query) + })?; + pipe_operators.push(PipeOperator::Union { set_quantifier, queries }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 281d352ff..9fa9b5d0f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15201,6 +15201,24 @@ fn parse_pipeline_operator() { "SELECT * FROM users |> RENAME id AS user_id", ); + // union pipe operator + dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); + + // union pipe operator with multiple queries + dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); + dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)"); + + // union pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); + + // union pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", From 3dbdadcc33695fe991fbd8a1e05479af4eefd334 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 17:32:34 +0200 Subject: [PATCH 03/20] support intersect --- src/ast/query.rs | 40 ++++++++++++++++++++++++++++++++++++--- src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_common.rs | 16 ++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 998a1e565..c043ba40c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2695,7 +2695,16 @@ pub enum PipeOperator { /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` /// /// See more at - Union { + Union { + set_quantifier: SetQuantifier, + queries: Vec>, + }, + /// Returns only the rows that are present in both the input table and the specified tables. + /// + /// Syntax: `|> INTERSECT [DISTINCT] (), (), ...` + /// + /// See more at + Intersect { set_quantifier: SetQuantifier, queries: Vec>, }, @@ -2757,12 +2766,37 @@ impl fmt::Display for PipeOperator { PipeOperator::Rename { mappings } => { write!(f, "RENAME {}", display_comma_separated(mappings)) } - PipeOperator::Union { set_quantifier, queries } => { + PipeOperator::Union { + set_quantifier, + queries, + } => { write!(f, "UNION")?; match set_quantifier { SetQuantifier::All => write!(f, " ALL")?, SetQuantifier::Distinct => write!(f, " DISTINCT")?, - SetQuantifier::None => {}, + SetQuantifier::None => {} + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } + Ok(()) + } + PipeOperator::Intersect { + set_quantifier, + queries, + } => { + write!(f, "INTERSECT")?; + match set_quantifier { + SetQuantifier::All => write!(f, " ALL")?, + SetQuantifier::Distinct => write!(f, " DISTINCT")?, + SetQuantifier::None => {} _ => { write!(f, " {}", set_quantifier)?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6cbfeffca..1b0fc5e08 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11085,6 +11085,7 @@ impl<'a> Parser<'a> { Keyword::TABLESAMPLE, Keyword::RENAME, Keyword::UNION, + Keyword::INTERSECT, ])?; match kw { Keyword::SELECT => { @@ -11168,6 +11169,19 @@ impl<'a> Parser<'a> { })?; pipe_operators.push(PipeOperator::Union { set_quantifier, queries }); } + Keyword::INTERSECT => { + // Reuse existing set quantifier parser for consistent modifier support + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Intersect)); + // BigQuery INTERSECT pipe operator requires parentheses around queries + // Parse comma-separated list of parenthesized queries + let queries = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(query) + })?; + pipe_operators.push(PipeOperator::Intersect { set_quantifier, queries }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9fa9b5d0f..ddffce8f8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15219,6 +15219,22 @@ fn parse_pipeline_operator() { // union pipe operator with BY NAME and multiple queries dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // intersect pipe operator (BigQuery does not support ALL modifier for INTERSECT) + dialects.verified_stmt("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); + + // intersect pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); + + // intersect pipe operator with multiple queries + dialects.verified_stmt("SELECT * FROM users |> INTERSECT (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)"); + + // intersect pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", From affe85096372e786bea714f41002f1ba513856f5 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 17:53:56 +0200 Subject: [PATCH 04/20] support except --- src/ast/query.rs | 31 +++++++++++++++++++++++++++++ src/parser/mod.rs | 22 +++++++++++++++++++++ tests/sqlparser_common.rs | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index c043ba40c..b8da6a136 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2708,6 +2708,15 @@ pub enum PipeOperator { set_quantifier: SetQuantifier, queries: Vec>, }, + /// Returns only the rows that are present in the input table but not in the specified tables. + /// + /// Syntax: `|> EXCEPT DISTINCT (), (), ...` + /// + /// See more at + Except { + set_quantifier: SetQuantifier, + queries: Vec>, + }, } impl fmt::Display for PipeOperator { @@ -2810,6 +2819,28 @@ impl fmt::Display for PipeOperator { } Ok(()) } + PipeOperator::Except { + set_quantifier, + queries, + } => { + write!(f, "EXCEPT")?; + match set_quantifier { + SetQuantifier::All => write!(f, " ALL")?, + SetQuantifier::Distinct => write!(f, " DISTINCT")?, + SetQuantifier::None => {} + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1b0fc5e08..88e1fdf16 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11086,6 +11086,7 @@ impl<'a> Parser<'a> { Keyword::RENAME, Keyword::UNION, Keyword::INTERSECT, + Keyword::EXCEPT, ])?; match kw { Keyword::SELECT => { @@ -11182,6 +11183,27 @@ impl<'a> Parser<'a> { })?; pipe_operators.push(PipeOperator::Intersect { set_quantifier, queries }); } + Keyword::EXCEPT => { + // BigQuery EXCEPT pipe operator requires DISTINCT modifier + let set_quantifier = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + return Err(ParserError::ParserError( + "EXCEPT pipe operator requires DISTINCT modifier".to_string() + )); + }; + // BigQuery EXCEPT pipe operator requires parentheses around queries + // Parse comma-separated list of parenthesized queries + let queries = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(query) + })?; + pipe_operators.push(PipeOperator::Except { set_quantifier, queries }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ddffce8f8..5e03a71f6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15235,12 +15235,53 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); + + // except pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); + + // except pipe operator with multiple queries + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)"); + + // except pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", ); } +#[test] +fn parse_pipeline_operator_negative_tests() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + + // Test that plain EXCEPT without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT (SELECT * FROM admins)").unwrap_err() + ); + + // Test that EXCEPT ALL fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT ALL (SELECT * FROM admins)").unwrap_err() + ); + + // Test that EXCEPT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT BY NAME (SELECT * FROM admins)").unwrap_err() + ); + + // Test that EXCEPT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)").unwrap_err() + ); +} + #[test] fn parse_multiple_set_statements() -> Result<(), ParserError> { let dialects = all_dialects_where(|d| d.supports_comma_separated_set_assignments()); From e215e7f6d7b8f79e164db7344d877ed74ec0c81f Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 17:56:47 +0200 Subject: [PATCH 05/20] intersect requires distinct --- src/parser/mod.rs | 12 ++++++++++-- tests/sqlparser_common.rs | 30 +++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 88e1fdf16..dbc5a293e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11171,8 +11171,16 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::Union { set_quantifier, queries }); } Keyword::INTERSECT => { - // Reuse existing set quantifier parser for consistent modifier support - let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Intersect)); + // BigQuery INTERSECT pipe operator requires DISTINCT modifier + let set_quantifier = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + return Err(ParserError::ParserError( + "INTERSECT pipe operator requires DISTINCT modifier".to_string() + )); + }; // BigQuery INTERSECT pipe operator requires parentheses around queries // Parse comma-separated list of parenthesized queries let queries = self.parse_comma_separated(|parser| { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5e03a71f6..9d6a9f730 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15219,20 +15219,16 @@ fn parse_pipeline_operator() { // union pipe operator with BY NAME and multiple queries dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); - // intersect pipe operator (BigQuery does not support ALL modifier for INTERSECT) - dialects.verified_stmt("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)"); + // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); // intersect pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); // intersect pipe operator with multiple queries - dialects.verified_stmt("SELECT * FROM users |> INTERSECT (SELECT * FROM admins), (SELECT * FROM guests)"); dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)"); // intersect pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) @@ -15280,6 +15276,30 @@ fn parse_pipeline_operator_negative_tests() { ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)").unwrap_err() ); + + // Test that plain INTERSECT without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)").unwrap_err() + ); + + // Test that INTERSECT ALL fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT ALL (SELECT * FROM admins)").unwrap_err() + ); + + // Test that INTERSECT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)").unwrap_err() + ); + + // Test that INTERSECT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)").unwrap_err() + ); } #[test] From 96eb232ba17f1f36ac7146d1e506035c098cfb75 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 18:21:52 +0200 Subject: [PATCH 06/20] support call operator --- src/ast/query.rs | 13 ++++++++++ src/parser/mod.rs | 19 +++++++++++++++ tests/sqlparser_common.rs | 51 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index b8da6a136..1816094d8 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2717,6 +2717,12 @@ pub enum PipeOperator { set_quantifier: SetQuantifier, queries: Vec>, }, + /// Calls a table function or procedure that returns a table. + /// + /// Syntax: `|> CALL function_name(args) [AS alias]` + /// + /// See more at + Call { function: Function, alias: Option }, } impl fmt::Display for PipeOperator { @@ -2841,6 +2847,13 @@ impl fmt::Display for PipeOperator { } Ok(()) } + PipeOperator::Call { function, alias } => { + write!(f, "CALL {}", function)?; + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dbc5a293e..2cb5ff7b0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11087,6 +11087,7 @@ impl<'a> Parser<'a> { Keyword::UNION, Keyword::INTERSECT, Keyword::EXCEPT, + Keyword::CALL, ])?; match kw { Keyword::SELECT => { @@ -11212,6 +11213,24 @@ impl<'a> Parser<'a> { })?; pipe_operators.push(PipeOperator::Except { set_quantifier, queries }); } + Keyword::CALL => { + let function_name = self.parse_object_name(false)?; + let function_expr = self.parse_function(function_name)?; + // Extract Function from Expr::Function + if let Expr::Function(function) = function_expr { + // Parse optional alias + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + pipe_operators.push(PipeOperator::Call { function, alias }); + } else { + return Err(ParserError::ParserError( + "Expected function call after CALL".to_string() + )); + } + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9d6a9f730..e50fd6463 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15243,6 +15243,32 @@ fn parse_pipeline_operator() { // except pipe operator with BY NAME and multiple queries dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + // call pipe operator + dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); + dialects.verified_stmt("SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')"); + + // call pipe operator with complex arguments + dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); + dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); + + // call pipe operator with aliases + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); + dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); + + // multiple call pipe operators in sequence + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); + dialects.verified_stmt("SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)"); + + // multiple call pipe operators with aliases + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2"); + dialects.verified_stmt("SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results"); + + // call pipe operators mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)"); + dialects.verified_stmt("SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()"); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", @@ -15300,6 +15326,31 @@ fn parse_pipeline_operator_negative_tests() { ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)").unwrap_err() ); + + // Test that CALL without function name fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> CALL").is_err() + ); + + // Test that CALL without parentheses fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function").is_err() + ); + + // Test that CALL with invalid function syntax fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> CALL 123invalid").is_err() + ); + + // Test that CALL with malformed arguments fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function(,)").is_err() + ); + + // Test that CALL with invalid alias syntax fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function() AS").is_err() + ); } #[test] From 140d723b832446caf26956f9ffadfc6a2bdc01af Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 21:41:30 +0200 Subject: [PATCH 07/20] impl pivot --- src/ast/query.rs | 29 +++++++++++++++++++++ src/parser/mod.rs | 46 +++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 1816094d8..c1f0450ef 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2723,6 +2723,17 @@ pub enum PipeOperator { /// /// See more at Call { function: Function, alias: Option }, + /// Pivots data from rows to columns. + /// + /// Syntax: `|> PIVOT(aggregate_function(column) FOR pivot_column IN (value1, value2, ...)) [AS alias]` + /// + /// See more at + Pivot { + aggregate_functions: Vec, + value_column: Vec, + value_source: PivotValueSource, + alias: Option, + }, } impl fmt::Display for PipeOperator { @@ -2854,6 +2865,24 @@ impl fmt::Display for PipeOperator { } Ok(()) } + PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + } => { + write!( + f, + "PIVOT({} FOR {} IN ({}))", + display_comma_separated(aggregate_functions), + Expr::CompoundIdentifier(value_column.to_vec()), + value_source + )?; + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2cb5ff7b0..6659127ad 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11088,6 +11088,7 @@ impl<'a> Parser<'a> { Keyword::INTERSECT, Keyword::EXCEPT, Keyword::CALL, + Keyword::PIVOT, ])?; match kw { Keyword::SELECT => { @@ -11231,6 +11232,51 @@ impl<'a> Parser<'a> { )); } } + Keyword::PIVOT => { + self.expect_token(&Token::LParen)?; + let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword_is(Keyword::FOR)?; + let value_column = self.parse_period_separated(|p| p.parse_identifier())?; + self.expect_keyword_is(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + }; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + // Parse optional alias (with or without AS keyword) + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + // Check if the next token is an identifier (implicit alias) + let checkpoint = self.index; + match self.parse_identifier() { + Ok(ident) => Some(ident), + Err(_) => { + self.index = checkpoint; // Rewind on failure + None + } + } + }; + + pipe_operators.push(PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e50fd6463..345ebd69f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15269,6 +15269,34 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)"); dialects.verified_stmt("SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()"); + // pivot pipe operator + dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); + dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); + + // pivot pipe operator with multiple aggregate functions + dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); + + // pivot pipe operator with compound column names + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); + + // pivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); + + // pivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); + dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); + + // pivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + ); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", @@ -15351,6 +15379,31 @@ fn parse_pipeline_operator_negative_tests() { assert!( dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function() AS").is_err() ); + + // Test that PIVOT without parentheses fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> PIVOT SUM(amount) FOR month IN ('Jan')").is_err() + ); + + // Test that PIVOT without FOR keyword fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) month IN ('Jan'))").is_err() + ); + + // Test that PIVOT without IN keyword fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month ('Jan'))").is_err() + ); + + // Test that PIVOT with empty IN list fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ())").is_err() + ); + + // Test that PIVOT with invalid alias syntax fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS").is_err() + ); } #[test] From eaf7c8ef43d04cc681e587b2b088d9195508f72d Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 21:47:20 +0200 Subject: [PATCH 08/20] unpivot --- src/ast/query.rs | 32 ++++++++++++++++++ src/parser/mod.rs | 46 ++++++++++++++++++++++++++ tests/sqlparser_common.rs | 69 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index c1f0450ef..fa42cc84b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2734,6 +2734,20 @@ pub enum PipeOperator { value_source: PivotValueSource, alias: Option, }, + /// The `UNPIVOT` pipe operator transforms columns into rows. + /// + /// Syntax: + /// ```sql + /// |> UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] + /// ``` + /// + /// See more at + Unpivot { + value_column: Ident, + name_column: Ident, + unpivot_columns: Vec, + alias: Option, + }, } impl fmt::Display for PipeOperator { @@ -2883,6 +2897,24 @@ impl fmt::Display for PipeOperator { } Ok(()) } + PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + } => { + write!( + f, + "UNPIVOT({} FOR {} IN ({}))", + value_column, + name_column, + display_comma_separated(unpivot_columns) + )?; + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6659127ad..9ee7c5782 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11089,6 +11089,7 @@ impl<'a> Parser<'a> { Keyword::EXCEPT, Keyword::CALL, Keyword::PIVOT, + Keyword::UNPIVOT, ])?; match kw { Keyword::SELECT => { @@ -11277,6 +11278,51 @@ impl<'a> Parser<'a> { alias, }); } + Keyword::UNPIVOT => { + // Parse UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] + self.expect_token(&Token::LParen)?; + + // Parse value_column + let value_column = self.parse_identifier()?; + + // Parse FOR keyword + self.expect_keyword(Keyword::FOR)?; + + // Parse name_column + let name_column = self.parse_identifier()?; + + // Parse IN keyword + self.expect_keyword(Keyword::IN)?; + + // Parse (column1, column2, ...) + self.expect_token(&Token::LParen)?; + let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + + self.expect_token(&Token::RParen)?; + + // Parse optional alias (with or without AS keyword) + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + // Check if the next token is an identifier (implicit alias) + let checkpoint = self.index; + match self.parse_identifier() { + Ok(ident) => Some(ident), + Err(_) => { + self.index = checkpoint; // Rewind on failure + None + } + } + }; + + pipe_operators.push(PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + }); + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 345ebd69f..020df61c7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15297,6 +15297,33 @@ fn parse_pipeline_operator() { "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", ); + // unpivot pipe operator basic usage + dialects.verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); + dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))"); + + // unpivot pipe operator with multiple columns + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); + dialects.verified_stmt("SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))"); + + // unpivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + + // unpivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); + dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data"); + dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); + + // unpivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", @@ -15404,6 +15431,48 @@ fn parse_pipeline_operator_negative_tests() { assert!( dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS").is_err() ); + + // Test UNPIVOT negative cases + + // Test that UNPIVOT without parentheses fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT value FOR name IN col1, col2").is_err() + ); + + // Test that UNPIVOT without FOR keyword fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value name IN (col1, col2))").is_err() + ); + + // Test that UNPIVOT without IN keyword fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name (col1, col2))").is_err() + ); + + // Test that UNPIVOT with missing value column fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(FOR name IN (col1, col2))").is_err() + ); + + // Test that UNPIVOT with missing name column fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR IN (col1, col2))").is_err() + ); + + // Test that UNPIVOT with empty IN list fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN ())").is_err() + ); + + // Test that UNPIVOT with invalid alias syntax fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)) AS").is_err() + ); + + // Test that UNPIVOT with missing closing parenthesis fails + assert!( + dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)").is_err() + ); } #[test] From 4478d6b1d3c53c4337b44aa256b69a6f917f08ae Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 21:47:35 +0200 Subject: [PATCH 09/20] fmt --- src/ast/query.rs | 5 +- src/parser/mod.rs | 77 ++++++----- tests/sqlparser_common.rs | 266 +++++++++++++++++++++++--------------- 3 files changed, 209 insertions(+), 139 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index fa42cc84b..68d523090 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2722,7 +2722,10 @@ pub enum PipeOperator { /// Syntax: `|> CALL function_name(args) [AS alias]` /// /// See more at - Call { function: Function, alias: Option }, + Call { + function: Function, + alias: Option, + }, /// Pivots data from rows to columns. /// /// Syntax: `|> PIVOT(aggregate_function(column) FOR pivot_column IN (value1, value2, ...)) [AS alias]` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9ee7c5782..bafd7a385 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11157,7 +11157,8 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::TableSample { sample }); } Keyword::RENAME => { - let mappings = self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + let mappings = + self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; pipe_operators.push(PipeOperator::Rename { mappings }); } Keyword::UNION => { @@ -11171,19 +11172,23 @@ impl<'a> Parser<'a> { parser.expect_token(&Token::RParen)?; Ok(query) })?; - pipe_operators.push(PipeOperator::Union { set_quantifier, queries }); + pipe_operators.push(PipeOperator::Union { + set_quantifier, + queries, + }); } Keyword::INTERSECT => { // BigQuery INTERSECT pipe operator requires DISTINCT modifier - let set_quantifier = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - return Err(ParserError::ParserError( - "INTERSECT pipe operator requires DISTINCT modifier".to_string() - )); - }; + let set_quantifier = + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + return Err(ParserError::ParserError( + "INTERSECT pipe operator requires DISTINCT modifier".to_string(), + )); + }; // BigQuery INTERSECT pipe operator requires parentheses around queries // Parse comma-separated list of parenthesized queries let queries = self.parse_comma_separated(|parser| { @@ -11192,19 +11197,23 @@ impl<'a> Parser<'a> { parser.expect_token(&Token::RParen)?; Ok(query) })?; - pipe_operators.push(PipeOperator::Intersect { set_quantifier, queries }); + pipe_operators.push(PipeOperator::Intersect { + set_quantifier, + queries, + }); } Keyword::EXCEPT => { // BigQuery EXCEPT pipe operator requires DISTINCT modifier - let set_quantifier = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - return Err(ParserError::ParserError( - "EXCEPT pipe operator requires DISTINCT modifier".to_string() - )); - }; + let set_quantifier = + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + return Err(ParserError::ParserError( + "EXCEPT pipe operator requires DISTINCT modifier".to_string(), + )); + }; // BigQuery EXCEPT pipe operator requires parentheses around queries // Parse comma-separated list of parenthesized queries let queries = self.parse_comma_separated(|parser| { @@ -11213,7 +11222,10 @@ impl<'a> Parser<'a> { parser.expect_token(&Token::RParen)?; Ok(query) })?; - pipe_operators.push(PipeOperator::Except { set_quantifier, queries }); + pipe_operators.push(PipeOperator::Except { + set_quantifier, + queries, + }); } Keyword::CALL => { let function_name = self.parse_object_name(false)?; @@ -11229,13 +11241,14 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::Call { function, alias }); } else { return Err(ParserError::ParserError( - "Expected function call after CALL".to_string() + "Expected function call after CALL".to_string(), )); } } Keyword::PIVOT => { self.expect_token(&Token::LParen)?; - let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_aliased_function_call)?; self.expect_keyword_is(Keyword::FOR)?; let value_column = self.parse_period_separated(|p| p.parse_identifier())?; self.expect_keyword_is(Keyword::IN)?; @@ -11251,7 +11264,9 @@ impl<'a> Parser<'a> { } else if self.peek_sub_query() { PivotValueSource::Subquery(self.parse_query()?) } else { - PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias)?, + ) }; self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?; @@ -11281,24 +11296,24 @@ impl<'a> Parser<'a> { Keyword::UNPIVOT => { // Parse UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] self.expect_token(&Token::LParen)?; - + // Parse value_column let value_column = self.parse_identifier()?; - + // Parse FOR keyword self.expect_keyword(Keyword::FOR)?; - + // Parse name_column let name_column = self.parse_identifier()?; - + // Parse IN keyword self.expect_keyword(Keyword::IN)?; - + // Parse (column1, column2, ...) self.expect_token(&Token::LParen)?; let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; self.expect_token(&Token::RParen)?; - + self.expect_token(&Token::RParen)?; // Parse optional alias (with or without AS keyword) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 020df61c7..3ca8ca11d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15205,88 +15205,113 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); - + // union pipe operator with multiple queries - dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt( + "SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", + ); dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); - dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)"); - + dialects.verified_stmt( + "SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)", + ); + // union pipe operator with BY NAME modifier dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); - + // union pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt( + "SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", + ); // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); - + // intersect pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); - + dialects + .verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); + // intersect pipe operator with multiple queries - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)"); - + dialects.verified_stmt( + "SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + // intersect pipe operator with BY NAME and multiple queries dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); - - // except pipe operator with BY NAME modifier + + // except pipe operator with BY NAME modifier dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); - + // except pipe operator with multiple queries - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)"); - + dialects.verified_stmt( + "SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + // except pipe operator with BY NAME and multiple queries dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); // call pipe operator dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); - dialects.verified_stmt("SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')"); - + dialects.verified_stmt( + "SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')", + ); + // call pipe operator with complex arguments dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); - + // call pipe operator with aliases dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); - + // multiple call pipe operators in sequence dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); - dialects.verified_stmt("SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)"); - + dialects.verified_stmt( + "SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)", + ); + // multiple call pipe operators with aliases - dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2"); - dialects.verified_stmt("SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results"); - + dialects.verified_stmt( + "SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", + ); + // call pipe operators mixed with other pipe operators - dialects.verified_stmt("SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)"); - dialects.verified_stmt("SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()"); + dialects.verified_stmt( + "SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", + ); // pivot pipe operator - dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); + dialects.verified_stmt( + "SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", + ); dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); - + // pivot pipe operator with multiple aggregate functions dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); - + // pivot pipe operator with compound column names dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); - + // pivot pipe operator mixed with other pipe operators dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); - + // pivot pipe operator with aliases dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); - + // pivot pipe operator with implicit aliases (without AS keyword) dialects.verified_query_with_canonical( "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", @@ -15298,22 +15323,29 @@ fn parse_pipeline_operator() { ); // unpivot pipe operator basic usage - dialects.verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects + .verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); - dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))"); - + dialects.verified_stmt( + "SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", + ); + // unpivot pipe operator with multiple columns dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); - dialects.verified_stmt("SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))"); - + dialects.verified_stmt( + "SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))", + ); + // unpivot pipe operator mixed with other pipe operators dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); - + // unpivot pipe operator with aliases dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); - dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data"); + dialects.verified_stmt( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); - + // unpivot pipe operator with implicit aliases (without AS keyword) dialects.verified_query_with_canonical( "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", @@ -15337,142 +15369,162 @@ fn parse_pipeline_operator_negative_tests() { // Test that plain EXCEPT without DISTINCT fails assert_eq!( ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT (SELECT * FROM admins)") + .unwrap_err() ); - // Test that EXCEPT ALL fails + // Test that EXCEPT ALL fails assert_eq!( ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT ALL (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT ALL (SELECT * FROM admins)") + .unwrap_err() ); // Test that EXCEPT BY NAME without DISTINCT fails assert_eq!( ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT BY NAME (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT BY NAME (SELECT * FROM admins)") + .unwrap_err() ); // Test that EXCEPT ALL BY NAME fails assert_eq!( ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements( + "SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() ); // Test that plain INTERSECT without DISTINCT fails assert_eq!( ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)") + .unwrap_err() ); - // Test that INTERSECT ALL fails + // Test that INTERSECT ALL fails assert_eq!( ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT ALL (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT ALL (SELECT * FROM admins)") + .unwrap_err() ); // Test that INTERSECT BY NAME without DISTINCT fails assert_eq!( ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)") + .unwrap_err() ); // Test that INTERSECT ALL BY NAME fails assert_eq!( ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), - dialects.parse_sql_statements("SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)").unwrap_err() + dialects + .parse_sql_statements( + "SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() ); // Test that CALL without function name fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> CALL").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL") + .is_err()); - // Test that CALL without parentheses fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function").is_err() - ); + // Test that CALL without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function") + .is_err()); // Test that CALL with invalid function syntax fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> CALL 123invalid").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL 123invalid") + .is_err()); // Test that CALL with malformed arguments fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function(,)").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function(,)") + .is_err()); // Test that CALL with invalid alias syntax fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> CALL my_function() AS").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function() AS") + .is_err()); // Test that PIVOT without parentheses fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> PIVOT SUM(amount) FOR month IN ('Jan')").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT SUM(amount) FOR month IN ('Jan')") + .is_err()); // Test that PIVOT without FOR keyword fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) month IN ('Jan'))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) month IN ('Jan'))") + .is_err()); // Test that PIVOT without IN keyword fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month ('Jan'))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month ('Jan'))") + .is_err()); // Test that PIVOT with empty IN list fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ())").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ())") + .is_err()); // Test that PIVOT with invalid alias syntax fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS") + .is_err()); // Test UNPIVOT negative cases - + // Test that UNPIVOT without parentheses fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT value FOR name IN col1, col2").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT value FOR name IN col1, col2") + .is_err()); // Test that UNPIVOT without FOR keyword fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value name IN (col1, col2))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value name IN (col1, col2))") + .is_err()); // Test that UNPIVOT without IN keyword fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name (col1, col2))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name (col1, col2))") + .is_err()); // Test that UNPIVOT with missing value column fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(FOR name IN (col1, col2))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(FOR name IN (col1, col2))") + .is_err()); // Test that UNPIVOT with missing name column fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR IN (col1, col2))").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR IN (col1, col2))") + .is_err()); // Test that UNPIVOT with empty IN list fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN ())").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN ())") + .is_err()); // Test that UNPIVOT with invalid alias syntax fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)) AS").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)) AS") + .is_err()); // Test that UNPIVOT with missing closing parenthesis fails - assert!( - dialects.parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)").is_err() - ); + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)") + .is_err()); } #[test] From 34ba719bb81b8285f1dd413f22638fcf37a98e2a Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 5 Jun 2025 23:08:42 +0200 Subject: [PATCH 10/20] self review --- src/ast/query.rs | 88 +++++++++++++++++------------------------------ src/parser/mod.rs | 85 ++++++++++++++++++--------------------------- 2 files changed, 64 insertions(+), 109 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 68d523090..10dc9012e 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2812,69 +2812,15 @@ impl fmt::Display for PipeOperator { PipeOperator::Union { set_quantifier, queries, - } => { - write!(f, "UNION")?; - match set_quantifier { - SetQuantifier::All => write!(f, " ALL")?, - SetQuantifier::Distinct => write!(f, " DISTINCT")?, - SetQuantifier::None => {} - _ => { - write!(f, " {}", set_quantifier)?; - } - } - write!(f, " ")?; - for (i, query) in queries.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "({})", query)?; - } - Ok(()) - } + } => Self::fmt_set_operation(f, "UNION", set_quantifier, queries), PipeOperator::Intersect { set_quantifier, queries, - } => { - write!(f, "INTERSECT")?; - match set_quantifier { - SetQuantifier::All => write!(f, " ALL")?, - SetQuantifier::Distinct => write!(f, " DISTINCT")?, - SetQuantifier::None => {} - _ => { - write!(f, " {}", set_quantifier)?; - } - } - write!(f, " ")?; - for (i, query) in queries.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "({})", query)?; - } - Ok(()) - } + } => Self::fmt_set_operation(f, "INTERSECT", set_quantifier, queries), PipeOperator::Except { set_quantifier, queries, - } => { - write!(f, "EXCEPT")?; - match set_quantifier { - SetQuantifier::All => write!(f, " ALL")?, - SetQuantifier::Distinct => write!(f, " DISTINCT")?, - SetQuantifier::None => {} - _ => { - write!(f, " {}", set_quantifier)?; - } - } - write!(f, " ")?; - for (i, query) in queries.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "({})", query)?; - } - Ok(()) - } + } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries), PipeOperator::Call { function, alias } => { write!(f, "CALL {}", function)?; if let Some(alias) = alias { @@ -2922,6 +2868,34 @@ impl fmt::Display for PipeOperator { } } +impl PipeOperator { + /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) with queries + fn fmt_set_operation( + f: &mut fmt::Formatter<'_>, + operation: &str, + set_quantifier: &SetQuantifier, + queries: &[Box], + ) -> fmt::Result { + write!(f, "{}", operation)?; + match set_quantifier { + SetQuantifier::All => write!(f, " ALL")?, + SetQuantifier::Distinct => write!(f, " DISTINCT")?, + SetQuantifier::None => {} + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bafd7a385..c88dc8f68 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9947,6 +9947,7 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional pub fn parse_identifier_with_optional_alias(&mut self) -> Result { let ident = self.parse_identifier()?; let _after_as = self.parse_keyword(Keyword::AS); @@ -9954,6 +9955,33 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result>, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(query) + }) + } + + /// Parse optional alias (with or without AS keyword) for pipe operators + fn parse_optional_pipe_alias(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()).transpose() + } else { + // Check if the next token is an identifier (implicit alias) + let checkpoint = self.index; + match self.parse_identifier() { + Ok(ident) => Ok(Some(ident)), + Err(_) => { + self.index = checkpoint; // Rewind on failure + Ok(None) + } + } + } + } + /// Optionally parses an alias for a select list item fn maybe_parse_select_item_alias(&mut self) -> Result, ParserError> { fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { @@ -11164,14 +11192,7 @@ impl<'a> Parser<'a> { Keyword::UNION => { // Reuse existing set quantifier parser for consistent BY NAME support let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); - // BigQuery UNION pipe operator requires parentheses around queries - // Parse comma-separated list of parenthesized queries - let queries = self.parse_comma_separated(|parser| { - parser.expect_token(&Token::LParen)?; - let query = parser.parse_query()?; - parser.expect_token(&Token::RParen)?; - Ok(query) - })?; + let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Union { set_quantifier, queries, @@ -11189,14 +11210,7 @@ impl<'a> Parser<'a> { "INTERSECT pipe operator requires DISTINCT modifier".to_string(), )); }; - // BigQuery INTERSECT pipe operator requires parentheses around queries - // Parse comma-separated list of parenthesized queries - let queries = self.parse_comma_separated(|parser| { - parser.expect_token(&Token::LParen)?; - let query = parser.parse_query()?; - parser.expect_token(&Token::RParen)?; - Ok(query) - })?; + let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Intersect { set_quantifier, queries, @@ -11214,14 +11228,7 @@ impl<'a> Parser<'a> { "EXCEPT pipe operator requires DISTINCT modifier".to_string(), )); }; - // BigQuery EXCEPT pipe operator requires parentheses around queries - // Parse comma-separated list of parenthesized queries - let queries = self.parse_comma_separated(|parser| { - parser.expect_token(&Token::LParen)?; - let query = parser.parse_query()?; - parser.expect_token(&Token::RParen)?; - Ok(query) - })?; + let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Except { set_quantifier, queries, @@ -11271,20 +11278,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?; - // Parse optional alias (with or without AS keyword) - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) - } else { - // Check if the next token is an identifier (implicit alias) - let checkpoint = self.index; - match self.parse_identifier() { - Ok(ident) => Some(ident), - Err(_) => { - self.index = checkpoint; // Rewind on failure - None - } - } - }; + let alias = self.parse_optional_pipe_alias()?; pipe_operators.push(PipeOperator::Pivot { aggregate_functions, @@ -11316,20 +11310,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; - // Parse optional alias (with or without AS keyword) - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) - } else { - // Check if the next token is an identifier (implicit alias) - let checkpoint = self.index; - match self.parse_identifier() { - Ok(ident) => Some(ident), - Err(_) => { - self.index = checkpoint; // Rewind on failure - None - } - } - }; + let alias = self.parse_optional_pipe_alias()?; pipe_operators.push(PipeOperator::Unpivot { value_column, From f62333ec7b96ba1714b258d9d08bb511a91a3e17 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 08:04:35 +0200 Subject: [PATCH 11/20] reduce duplication --- src/ast/query.rs | 23 +++++++++++----------- src/parser/mod.rs | 49 +++++++++++++++++++---------------------------- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 10dc9012e..2360d7701 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2823,10 +2823,7 @@ impl fmt::Display for PipeOperator { } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries), PipeOperator::Call { function, alias } => { write!(f, "CALL {}", function)?; - if let Some(alias) = alias { - write!(f, " AS {}", alias)?; - } - Ok(()) + Self::fmt_optional_alias(f, alias) } PipeOperator::Pivot { aggregate_functions, @@ -2841,10 +2838,7 @@ impl fmt::Display for PipeOperator { Expr::CompoundIdentifier(value_column.to_vec()), value_source )?; - if let Some(alias) = alias { - write!(f, " AS {}", alias)?; - } - Ok(()) + Self::fmt_optional_alias(f, alias) } PipeOperator::Unpivot { value_column, @@ -2859,16 +2853,21 @@ impl fmt::Display for PipeOperator { name_column, display_comma_separated(unpivot_columns) )?; - if let Some(alias) = alias { - write!(f, " AS {}", alias)?; - } - Ok(()) + Self::fmt_optional_alias(f, alias) } } } } impl PipeOperator { + /// Helper function to format optional alias for pipe operators + fn fmt_optional_alias(f: &mut fmt::Formatter<'_>, alias: &Option) -> fmt::Result { + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } + /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) with queries fn fmt_set_operation( f: &mut fmt::Formatter<'_>, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c88dc8f68..4c9e53ccf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -195,6 +195,9 @@ const EOF_TOKEN: TokenWithSpan = TokenWithSpan { }, }; +// Error message constant for pipe operators that require DISTINCT +const EXPECTED_FUNCTION_CALL_MSG: &str = "Expected function call after CALL"; + /// Composite types declarations using angle brackets syntax can be arbitrary /// nested such that the following declaration is possible: /// `ARRAY>` @@ -9965,6 +9968,19 @@ impl<'a> Parser<'a> { }) } + /// Parse set quantifier for pipe operators that require DISTINCT (INTERSECT, EXCEPT) + fn parse_distinct_required_set_quantifier(&mut self, operator_name: &str) -> Result { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + Ok(SetQuantifier::DistinctByName) + } else if self.parse_keyword(Keyword::DISTINCT) { + Ok(SetQuantifier::Distinct) + } else { + Err(ParserError::ParserError( + format!("{} pipe operator requires DISTINCT modifier", operator_name), + )) + } + } + /// Parse optional alias (with or without AS keyword) for pipe operators fn parse_optional_pipe_alias(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::AS) { @@ -11199,17 +11215,7 @@ impl<'a> Parser<'a> { }); } Keyword::INTERSECT => { - // BigQuery INTERSECT pipe operator requires DISTINCT modifier - let set_quantifier = - if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - return Err(ParserError::ParserError( - "INTERSECT pipe operator requires DISTINCT modifier".to_string(), - )); - }; + let set_quantifier = self.parse_distinct_required_set_quantifier("INTERSECT")?; let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Intersect { set_quantifier, @@ -11217,17 +11223,7 @@ impl<'a> Parser<'a> { }); } Keyword::EXCEPT => { - // BigQuery EXCEPT pipe operator requires DISTINCT modifier - let set_quantifier = - if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - return Err(ParserError::ParserError( - "EXCEPT pipe operator requires DISTINCT modifier".to_string(), - )); - }; + let set_quantifier = self.parse_distinct_required_set_quantifier("EXCEPT")?; let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Except { set_quantifier, @@ -11239,16 +11235,11 @@ impl<'a> Parser<'a> { let function_expr = self.parse_function(function_name)?; // Extract Function from Expr::Function if let Expr::Function(function) = function_expr { - // Parse optional alias - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) - } else { - None - }; + let alias = self.parse_optional_pipe_alias()?; pipe_operators.push(PipeOperator::Call { function, alias }); } else { return Err(ParserError::ParserError( - "Expected function call after CALL".to_string(), + EXPECTED_FUNCTION_CALL_MSG.to_string(), )); } } From 9f91429dc915586671c2773f15f42db8f1820f33 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 08:07:32 +0200 Subject: [PATCH 12/20] inline constant --- src/parser/mod.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4c9e53ccf..994b29bde 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -195,9 +195,6 @@ const EOF_TOKEN: TokenWithSpan = TokenWithSpan { }, }; -// Error message constant for pipe operators that require DISTINCT -const EXPECTED_FUNCTION_CALL_MSG: &str = "Expected function call after CALL"; - /// Composite types declarations using angle brackets syntax can be arbitrary /// nested such that the following declaration is possible: /// `ARRAY>` @@ -9969,15 +9966,19 @@ impl<'a> Parser<'a> { } /// Parse set quantifier for pipe operators that require DISTINCT (INTERSECT, EXCEPT) - fn parse_distinct_required_set_quantifier(&mut self, operator_name: &str) -> Result { + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result { if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { Ok(SetQuantifier::DistinctByName) } else if self.parse_keyword(Keyword::DISTINCT) { Ok(SetQuantifier::Distinct) } else { - Err(ParserError::ParserError( - format!("{} pipe operator requires DISTINCT modifier", operator_name), - )) + Err(ParserError::ParserError(format!( + "{} pipe operator requires DISTINCT modifier", + operator_name + ))) } } @@ -11215,7 +11216,8 @@ impl<'a> Parser<'a> { }); } Keyword::INTERSECT => { - let set_quantifier = self.parse_distinct_required_set_quantifier("INTERSECT")?; + let set_quantifier = + self.parse_distinct_required_set_quantifier("INTERSECT")?; let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Intersect { set_quantifier, @@ -11239,7 +11241,7 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::Call { function, alias }); } else { return Err(ParserError::ParserError( - EXPECTED_FUNCTION_CALL_MSG.to_string(), + "Expected function call after CALL".to_string(), )); } } From ea5001932007bee0b8648cb303fdfc308c63880c Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:02:27 +0200 Subject: [PATCH 13/20] don't box --- src/ast/query.rs | 8 ++++---- src/parser/mod.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 2360d7701..ad5cf9002 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2697,7 +2697,7 @@ pub enum PipeOperator { /// See more at Union { set_quantifier: SetQuantifier, - queries: Vec>, + queries: Vec, }, /// Returns only the rows that are present in both the input table and the specified tables. /// @@ -2706,7 +2706,7 @@ pub enum PipeOperator { /// See more at Intersect { set_quantifier: SetQuantifier, - queries: Vec>, + queries: Vec, }, /// Returns only the rows that are present in the input table but not in the specified tables. /// @@ -2715,7 +2715,7 @@ pub enum PipeOperator { /// See more at Except { set_quantifier: SetQuantifier, - queries: Vec>, + queries: Vec, }, /// Calls a table function or procedure that returns a table. /// @@ -2873,7 +2873,7 @@ impl PipeOperator { f: &mut fmt::Formatter<'_>, operation: &str, set_quantifier: &SetQuantifier, - queries: &[Box], + queries: &[Query], ) -> fmt::Result { write!(f, "{}", operation)?; match set_quantifier { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 994b29bde..097d29fc5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9956,12 +9956,12 @@ impl<'a> Parser<'a> { } /// Parse comma-separated list of parenthesized queries for pipe operators - fn parse_pipe_operator_queries(&mut self) -> Result>, ParserError> { + fn parse_pipe_operator_queries(&mut self) -> Result, ParserError> { self.parse_comma_separated(|parser| { parser.expect_token(&Token::LParen)?; let query = parser.parse_query()?; parser.expect_token(&Token::RParen)?; - Ok(query) + Ok(*query) }) } From 87c529b8645f388e242a83d28aa2dffd6c25a539 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:03:25 +0200 Subject: [PATCH 14/20] clarify --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 097d29fc5..638275d8c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9965,7 +9965,7 @@ impl<'a> Parser<'a> { }) } - /// Parse set quantifier for pipe operators that require DISTINCT (INTERSECT, EXCEPT) + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT fn parse_distinct_required_set_quantifier( &mut self, operator_name: &str, From 8c817c3bc423a4023498172ccdbafbea6b883ef1 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:08:39 +0200 Subject: [PATCH 15/20] remove useless comment --- src/parser/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 638275d8c..5d7e9e4c8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11207,7 +11207,6 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::Rename { mappings }); } Keyword::UNION => { - // Reuse existing set quantifier parser for consistent BY NAME support let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); let queries = self.parse_pipe_operator_queries()?; pipe_operators.push(PipeOperator::Union { @@ -11235,7 +11234,6 @@ impl<'a> Parser<'a> { Keyword::CALL => { let function_name = self.parse_object_name(false)?; let function_expr = self.parse_function(function_name)?; - // Extract Function from Expr::Function if let Expr::Function(function) = function_expr { let alias = self.parse_optional_pipe_alias()?; pipe_operators.push(PipeOperator::Call { function, alias }); From 840fbbd6cfd93acc8542b031d8fd2175dc0fc41a Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:20:56 +0200 Subject: [PATCH 16/20] use maybe_parse --- src/parser/mod.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5d7e9e4c8..572e5b3c5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9988,14 +9988,7 @@ impl<'a> Parser<'a> { Some(self.parse_identifier()).transpose() } else { // Check if the next token is an identifier (implicit alias) - let checkpoint = self.index; - match self.parse_identifier() { - Ok(ident) => Ok(Some(ident)), - Err(_) => { - self.index = checkpoint; // Rewind on failure - Ok(None) - } - } + self.maybe_parse(|parser| parser.parse_identifier()) } } From e1bbbd48ff9ffe90607cef09d266fb923ed79b41 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:23:38 +0200 Subject: [PATCH 17/20] cleanup --- src/parser/mod.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 572e5b3c5..a5de30c97 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11272,22 +11272,12 @@ impl<'a> Parser<'a> { }); } Keyword::UNPIVOT => { - // Parse UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] self.expect_token(&Token::LParen)?; - - // Parse value_column let value_column = self.parse_identifier()?; - - // Parse FOR keyword self.expect_keyword(Keyword::FOR)?; - - // Parse name_column let name_column = self.parse_identifier()?; - - // Parse IN keyword self.expect_keyword(Keyword::IN)?; - // Parse (column1, column2, ...) self.expect_token(&Token::LParen)?; let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; self.expect_token(&Token::RParen)?; From e78e8d1a9dc7d9ca8108a9217980d6aa34531be5 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Fri, 6 Jun 2025 09:29:49 +0200 Subject: [PATCH 18/20] simplify --- src/ast/query.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index ad5cf9002..70010a494 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2877,8 +2877,6 @@ impl PipeOperator { ) -> fmt::Result { write!(f, "{}", operation)?; match set_quantifier { - SetQuantifier::All => write!(f, " ALL")?, - SetQuantifier::Distinct => write!(f, " DISTINCT")?, SetQuantifier::None => {} _ => { write!(f, " {}", set_quantifier)?; From 989f6dd66be665c29bea61184b07ad9fd1f3a685 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Tue, 10 Jun 2025 18:45:49 +0200 Subject: [PATCH 19/20] impl join --- src/ast/query.rs | 7 +++ src/parser/mod.rs | 110 ++++++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 87 ++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 70010a494..6ee73d869 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2751,6 +2751,12 @@ pub enum PipeOperator { unpivot_columns: Vec, alias: Option, }, + /// Joins the input table with another table. + /// + /// Syntax: `|> [JOIN_TYPE] JOIN [alias] ON ` or `|> [JOIN_TYPE] JOIN
[alias] USING ()` + /// + /// See more at + Join(Join), } impl fmt::Display for PipeOperator { @@ -2855,6 +2861,7 @@ impl fmt::Display for PipeOperator { )?; Self::fmt_optional_alias(f, alias) } + PipeOperator::Join(join) => write!(f, "{}", join) } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a5de30c97..219a2d3d5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11128,6 +11128,12 @@ impl<'a> Parser<'a> { Keyword::CALL, Keyword::PIVOT, Keyword::UNPIVOT, + Keyword::JOIN, + Keyword::INNER, + Keyword::LEFT, + Keyword::RIGHT, + Keyword::FULL, + Keyword::CROSS, ])?; match kw { Keyword::SELECT => { @@ -11293,6 +11299,110 @@ impl<'a> Parser<'a> { alias, }); } + Keyword::JOIN => { + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::Join(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::INNER => { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "INNER JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::Inner(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::LEFT => { + let outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + let join_type = if outer { "LEFT OUTER JOIN" } else { "LEFT JOIN" }; + return Err(ParserError::ParserError(format!( + "{} in pipe syntax requires ON or USING clause", + join_type + ))); + } + let join_operator = if outer { + JoinOperator::LeftOuter(constraint) + } else { + JoinOperator::Left(constraint) + }; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::RIGHT => { + let outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + let join_type = if outer { "RIGHT OUTER JOIN" } else { "RIGHT JOIN" }; + return Err(ParserError::ParserError(format!( + "{} in pipe syntax requires ON or USING clause", + join_type + ))); + } + let join_operator = if outer { + JoinOperator::RightOuter(constraint) + } else { + JoinOperator::Right(constraint) + }; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::FULL => { + let _outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "FULL JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::FullOuter(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::CROSS => { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let join_operator = JoinOperator::CrossJoin; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3ca8ca11d..38221f178 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15360,6 +15360,58 @@ fn parse_pipeline_operator() { dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", ); + + // join pipe operator - INNER JOIN + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); + + // join pipe operator - LEFT JOIN + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id"); + + // join pipe operator - RIGHT JOIN + dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id"); + + // join pipe operator - FULL JOIN + dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - CROSS JOIN + dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); + + // join pipe operator with USING + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders USING (user_id)", + "SELECT * FROM users |> JOIN orders USING(user_id)", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", + "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", + ); + + // join pipe operator with alias + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", + "SELECT * FROM users |> JOIN orders AS o ON users.id = o.user_id", + ); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); + + // join pipe operator with complex ON condition + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); + + // multiple join pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + + // join pipe operator with other pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); } #[test] @@ -15525,6 +15577,41 @@ fn parse_pipeline_operator_negative_tests() { assert!(dialects .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)") .is_err()); + + // Test that JOIN without table name fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN ON users.id = orders.user_id") + .is_err()); + + // Test that JOIN without ON or USING condition fails (except CROSS JOIN) + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders") + .is_err()); + + // Test that CROSS JOIN with ON condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CROSS JOIN orders ON users.id = orders.user_id") + .is_err()); + + // Test that CROSS JOIN with USING condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CROSS JOIN orders USING (user_id)") + .is_err()); + + // Test that JOIN with empty USING list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING ()") + .is_err()); + + // Test that JOIN with malformed ON condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders ON") + .is_err()); + + // Test that JOIN with invalid USING syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING user_id") + .is_err()); } #[test] From 552def1f9ec99919222e41e18a8a517eaf582b93 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Tue, 10 Jun 2025 18:50:25 +0200 Subject: [PATCH 20/20] fmt --- src/ast/query.rs | 2 +- src/parser/mod.rs | 12 ++++++++++-- tests/sqlparser_common.rs | 30 ++++++++++++++++++------------ 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 6ee73d869..2139b88c2 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2861,7 +2861,7 @@ impl fmt::Display for PipeOperator { )?; Self::fmt_optional_alias(f, alias) } - PipeOperator::Join(join) => write!(f, "{}", join) + PipeOperator::Join(join) => write!(f, "{}", join), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 219a2d3d5..5b4260930 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11336,7 +11336,11 @@ impl<'a> Parser<'a> { let relation = self.parse_table_factor()?; let constraint = self.parse_join_constraint(false)?; if matches!(constraint, JoinConstraint::None) { - let join_type = if outer { "LEFT OUTER JOIN" } else { "LEFT JOIN" }; + let join_type = if outer { + "LEFT OUTER JOIN" + } else { + "LEFT JOIN" + }; return Err(ParserError::ParserError(format!( "{} in pipe syntax requires ON or USING clause", join_type @@ -11359,7 +11363,11 @@ impl<'a> Parser<'a> { let relation = self.parse_table_factor()?; let constraint = self.parse_join_constraint(false)?; if matches!(constraint, JoinConstraint::None) { - let join_type = if outer { "RIGHT OUTER JOIN" } else { "RIGHT JOIN" }; + let join_type = if outer { + "RIGHT OUTER JOIN" + } else { + "RIGHT JOIN" + }; return Err(ParserError::ParserError(format!( "{} in pipe syntax requires ON or USING clause", join_type diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 38221f178..b9e81f794 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15364,25 +15364,29 @@ fn parse_pipeline_operator() { // join pipe operator - INNER JOIN dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); - + // join pipe operator - LEFT JOIN dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id"); - + dialects.verified_stmt( + "SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id", + ); + // join pipe operator - RIGHT JOIN dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id"); - + dialects.verified_stmt( + "SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id", + ); + // join pipe operator - FULL JOIN dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); dialects.verified_query_with_canonical( "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", ); - + // join pipe operator - CROSS JOIN dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); - + // join pipe operator with USING dialects.verified_query_with_canonical( "SELECT * FROM users |> JOIN orders USING (user_id)", @@ -15392,22 +15396,22 @@ fn parse_pipeline_operator() { "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", ); - + // join pipe operator with alias dialects.verified_query_with_canonical( "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", "SELECT * FROM users |> JOIN orders AS o ON users.id = o.user_id", ); dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); - + // join pipe operator with complex ON condition dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); - + // multiple join pipe operators dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); - + // join pipe operator with other pipe operators dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); @@ -15590,7 +15594,9 @@ fn parse_pipeline_operator_negative_tests() { // Test that CROSS JOIN with ON condition fails assert!(dialects - .parse_sql_statements("SELECT * FROM users |> CROSS JOIN orders ON users.id = orders.user_id") + .parse_sql_statements( + "SELECT * FROM users |> CROSS JOIN orders ON users.id = orders.user_id" + ) .is_err()); // Test that CROSS JOIN with USING condition fails