diff --git a/src/ast/query.rs b/src/ast/query.rs index ffe1e4023..2139b88c2 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2684,6 +2684,79 @@ pub enum PipeOperator { /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at TableSample { sample: Box }, + /// Renames columns in the input table. + /// + /// Syntax: `|> RENAME old_name AS new_name, ...` + /// + /// See more at + Rename { mappings: Vec }, + /// Combines the input table with one or more tables using UNION. + /// + /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` + /// + /// See more at + Union { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Returns only the rows that are present in both the input table and the specified tables. + /// + /// Syntax: `|> INTERSECT [DISTINCT] (), (), ...` + /// + /// See more at + Intersect { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Returns only the rows that are present in the input table but not in the specified tables. + /// + /// Syntax: `|> EXCEPT DISTINCT (), (), ...` + /// + /// See more at + Except { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Calls a table function or procedure that returns a table. + /// + /// Syntax: `|> CALL function_name(args) [AS alias]` + /// + /// See more at + Call { + function: Function, + alias: Option, + }, + /// Pivots data from rows to columns. + /// + /// Syntax: `|> PIVOT(aggregate_function(column) FOR pivot_column IN (value1, value2, ...)) [AS alias]` + /// + /// See more at + Pivot { + aggregate_functions: Vec, + value_column: Vec, + value_source: PivotValueSource, + alias: Option, + }, + /// The `UNPIVOT` pipe operator transforms columns into rows. + /// + /// Syntax: + /// ```sql + /// |> UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] + /// ``` + /// + /// See more at + Unpivot { + value_column: Ident, + name_column: Ident, + unpivot_columns: Vec, + alias: Option, + }, + /// Joins the input table with another table. + /// + /// Syntax: `|> [JOIN_TYPE] JOIN [alias] ON ` or `|> [JOIN_TYPE] JOIN
[alias] USING ()` + /// + /// See more at + Join(Join), } impl fmt::Display for PipeOperator { @@ -2739,7 +2812,91 @@ impl fmt::Display for PipeOperator { PipeOperator::TableSample { sample } => { write!(f, "{}", sample) } + PipeOperator::Rename { mappings } => { + write!(f, "RENAME {}", display_comma_separated(mappings)) + } + PipeOperator::Union { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "UNION", set_quantifier, queries), + PipeOperator::Intersect { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "INTERSECT", set_quantifier, queries), + PipeOperator::Except { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries), + PipeOperator::Call { function, alias } => { + write!(f, "CALL {}", function)?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + } => { + write!( + f, + "PIVOT({} FOR {} IN ({}))", + display_comma_separated(aggregate_functions), + Expr::CompoundIdentifier(value_column.to_vec()), + value_source + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + } => { + write!( + f, + "UNPIVOT({} FOR {} IN ({}))", + value_column, + name_column, + display_comma_separated(unpivot_columns) + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Join(join) => write!(f, "{}", join), + } + } +} + +impl PipeOperator { + /// Helper function to format optional alias for pipe operators + fn fmt_optional_alias(f: &mut fmt::Formatter<'_>, alias: &Option) -> fmt::Result { + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; } + Ok(()) + } + + /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) with queries + fn fmt_set_operation( + f: &mut fmt::Formatter<'_>, + operation: &str, + set_quantifier: &SetQuantifier, + queries: &[Query], + ) -> fmt::Result { + write!(f, "{}", operation)?; + match set_quantifier { + SetQuantifier::None => {} + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } + Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3e721072b..5b4260930 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + pub fn parse_identifier_with_optional_alias(&mut self) -> Result { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(*query) + }) + } + + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + Ok(SetQuantifier::DistinctByName) + } else if self.parse_keyword(Keyword::DISTINCT) { + Ok(SetQuantifier::Distinct) + } else { + Err(ParserError::ParserError(format!( + "{} pipe operator requires DISTINCT modifier", + operator_name + ))) + } + } + + /// Parse optional alias (with or without AS keyword) for pipe operators + fn parse_optional_pipe_alias(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()).transpose() + } else { + // Check if the next token is an identifier (implicit alias) + self.maybe_parse(|parser| parser.parse_identifier()) + } + } + /// Optionally parses an alias for a select list item fn maybe_parse_select_item_alias(&mut self) -> Result, ParserError> { fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { @@ -11076,6 +11121,19 @@ impl<'a> Parser<'a> { Keyword::AGGREGATE, Keyword::ORDER, Keyword::TABLESAMPLE, + Keyword::RENAME, + Keyword::UNION, + Keyword::INTERSECT, + Keyword::EXCEPT, + Keyword::CALL, + Keyword::PIVOT, + Keyword::UNPIVOT, + Keyword::JOIN, + Keyword::INNER, + Keyword::LEFT, + Keyword::RIGHT, + Keyword::FULL, + Keyword::CROSS, ])?; match kw { Keyword::SELECT => { @@ -11142,6 +11200,217 @@ impl<'a> Parser<'a> { let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; pipe_operators.push(PipeOperator::TableSample { sample }); } + Keyword::RENAME => { + let mappings = + self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + pipe_operators.push(PipeOperator::Rename { mappings }); + } + Keyword::UNION => { + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Union { + set_quantifier, + queries, + }); + } + Keyword::INTERSECT => { + let set_quantifier = + self.parse_distinct_required_set_quantifier("INTERSECT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Intersect { + set_quantifier, + queries, + }); + } + Keyword::EXCEPT => { + let set_quantifier = self.parse_distinct_required_set_quantifier("EXCEPT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Except { + set_quantifier, + queries, + }); + } + Keyword::CALL => { + let function_name = self.parse_object_name(false)?; + let function_expr = self.parse_function(function_name)?; + if let Expr::Function(function) = function_expr { + let alias = self.parse_optional_pipe_alias()?; + pipe_operators.push(PipeOperator::Call { function, alias }); + } else { + return Err(ParserError::ParserError( + "Expected function call after CALL".to_string(), + )); + } + } + Keyword::PIVOT => { + self.expect_token(&Token::LParen)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword_is(Keyword::FOR)?; + let value_column = self.parse_period_separated(|p| p.parse_identifier())?; + self.expect_keyword_is(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias)?, + ) + }; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + }); + } + Keyword::UNPIVOT => { + self.expect_token(&Token::LParen)?; + let value_column = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name_column = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + }); + } + Keyword::JOIN => { + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::Join(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::INNER => { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "INNER JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::Inner(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::LEFT => { + let outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + let join_type = if outer { + "LEFT OUTER JOIN" + } else { + "LEFT JOIN" + }; + return Err(ParserError::ParserError(format!( + "{} in pipe syntax requires ON or USING clause", + join_type + ))); + } + let join_operator = if outer { + JoinOperator::LeftOuter(constraint) + } else { + JoinOperator::Left(constraint) + }; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::RIGHT => { + let outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + let join_type = if outer { + "RIGHT OUTER JOIN" + } else { + "RIGHT JOIN" + }; + return Err(ParserError::ParserError(format!( + "{} in pipe syntax requires ON or USING clause", + join_type + ))); + } + let join_operator = if outer { + JoinOperator::RightOuter(constraint) + } else { + JoinOperator::Right(constraint) + }; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::FULL => { + let _outer = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "FULL JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } + let join_operator = JoinOperator::FullOuter(constraint); + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } + Keyword::CROSS => { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + let join_operator = JoinOperator::CrossJoin; + pipe_operators.push(PipeOperator::Join(Join { + relation, + global: false, + join_operator, + })) + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a1a8fc3b3..b9e81f794 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15193,10 +15193,431 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + // rename pipe operator + dialects.verified_stmt("SELECT * FROM users |> RENAME old_name AS new_name"); + dialects.verified_stmt("SELECT * FROM users |> RENAME id AS user_id, name AS user_name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> RENAME id user_id", + "SELECT * FROM users |> RENAME id AS user_id", + ); + + // union pipe operator + dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); + + // union pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", + ); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); + dialects.verified_stmt( + "SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // union pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); + + // union pipe operator with BY NAME and multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); + + // intersect pipe operator with BY NAME modifier + dialects + .verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); + + // intersect pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // intersect pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + + // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); + + // except pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); + + // except pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // except pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + + // call pipe operator + dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); + dialects.verified_stmt( + "SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')", + ); + + // call pipe operator with complex arguments + dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); + dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); + + // call pipe operator with aliases + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); + dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); + + // multiple call pipe operators in sequence + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); + dialects.verified_stmt( + "SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)", + ); + + // multiple call pipe operators with aliases + dialects.verified_stmt( + "SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", + ); + + // call pipe operators mixed with other pipe operators + dialects.verified_stmt( + "SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", + ); + + // pivot pipe operator + dialects.verified_stmt( + "SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", + ); + dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); + + // pivot pipe operator with multiple aggregate functions + dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); + + // pivot pipe operator with compound column names + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); + + // pivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); + + // pivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); + dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); + + // pivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + ); + + // unpivot pipe operator basic usage + dialects + .verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); + dialects.verified_stmt( + "SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", + ); + + // unpivot pipe operator with multiple columns + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); + dialects.verified_stmt( + "SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))", + ); + + // unpivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + + // unpivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); + dialects.verified_stmt( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); + dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); + + // unpivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", ); + + // join pipe operator - INNER JOIN + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); + + // join pipe operator - LEFT JOIN + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt( + "SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - RIGHT JOIN + dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt( + "SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - FULL JOIN + dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - CROSS JOIN + dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); + + // join pipe operator with USING + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders USING (user_id)", + "SELECT * FROM users |> JOIN orders USING(user_id)", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", + "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", + ); + + // join pipe operator with alias + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", + "SELECT * FROM users |> JOIN orders AS o ON users.id = o.user_id", + ); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); + + // join pipe operator with complex ON condition + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); + + // multiple join pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + + // join pipe operator with other pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +} + +#[test] +fn parse_pipeline_operator_negative_tests() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + + // Test that plain EXCEPT without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT ALL fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT ALL (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT BY NAME (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements( + "SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() + ); + + // Test that plain INTERSECT without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT ALL fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT ALL (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements( + "SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() + ); + + // Test that CALL without function name fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL") + .is_err()); + + // Test that CALL without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function") + .is_err()); + + // Test that CALL with invalid function syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL 123invalid") + .is_err()); + + // Test that CALL with malformed arguments fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function(,)") + .is_err()); + + // Test that CALL with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function() AS") + .is_err()); + + // Test that PIVOT without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT SUM(amount) FOR month IN ('Jan')") + .is_err()); + + // Test that PIVOT without FOR keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) month IN ('Jan'))") + .is_err()); + + // Test that PIVOT without IN keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month ('Jan'))") + .is_err()); + + // Test that PIVOT with empty IN list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ())") + .is_err()); + + // Test that PIVOT with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS") + .is_err()); + + // Test UNPIVOT negative cases + + // Test that UNPIVOT without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT value FOR name IN col1, col2") + .is_err()); + + // Test that UNPIVOT without FOR keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value name IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT without IN keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name (col1, col2))") + .is_err()); + + // Test that UNPIVOT with missing value column fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(FOR name IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT with missing name column fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT with empty IN list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN ())") + .is_err()); + + // Test that UNPIVOT with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)) AS") + .is_err()); + + // Test that UNPIVOT with missing closing parenthesis fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)") + .is_err()); + + // Test that JOIN without table name fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN ON users.id = orders.user_id") + .is_err()); + + // Test that JOIN without ON or USING condition fails (except CROSS JOIN) + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders") + .is_err()); + + // Test that CROSS JOIN with ON condition fails + assert!(dialects + .parse_sql_statements( + "SELECT * FROM users |> CROSS JOIN orders ON users.id = orders.user_id" + ) + .is_err()); + + // Test that CROSS JOIN with USING condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CROSS JOIN orders USING (user_id)") + .is_err()); + + // Test that JOIN with empty USING list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING ()") + .is_err()); + + // Test that JOIN with malformed ON condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders ON") + .is_err()); + + // Test that JOIN with invalid USING syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING user_id") + .is_err()); } #[test]