From c64b31e918622492232b1bc86a84440207897287 Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Thu, 21 Nov 2024 12:58:12 -0800 Subject: [PATCH 1/2] generalize struct support and add databricks --- src/ast/mod.rs | 6 ++--- src/dialect/bigquery.rs | 10 ++++++++ src/dialect/databricks.rs | 5 ++++ src/dialect/generic.rs | 8 +++++++ src/dialect/mod.rs | 20 ++++++++++++++++ src/parser/mod.rs | 29 ++++++++++++---------- tests/sqlparser_databricks.rs | 45 +++++++++++++++++++++++++++++++++++ 7 files changed, 108 insertions(+), 15 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e52251d52..9a202e202 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -931,16 +931,16 @@ pub enum Expr { Rollup(Vec>), /// ROW / TUPLE a single value, such as `SELECT (1, 2)` Tuple(Vec), - /// `BigQuery` specific `Struct` literal expression [1] + /// `Struct` literal expression /// Syntax: /// ```sql /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) /// ``` - /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct { /// Struct values. values: Vec, - /// Struct field definitions. + /// BigQuery specific: Struct field definitions. + /// see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type fields: Vec, }, /// `BigQuery` specific: An named expression in a typeless struct [1] diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 96633552b..9c519435f 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -72,4 +72,14 @@ impl Dialect for BigQueryDialect { fn require_interval_qualifier(&self) -> bool { true } + + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + fn supports_struct_literal(&self) -> bool { + true + } + + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + fn supports_typed_struct_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 4924e8077..a3476b1b8 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -59,4 +59,9 @@ impl Dialect for DatabricksDialect { fn require_interval_qualifier(&self) -> bool { true } + + // See https://docs.databricks.com/en/sql/language-manual/functions/struct.html + fn supports_struct_literal(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index e3beeae7f..73402b8cb 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -123,4 +123,12 @@ impl Dialect for GenericDialect { fn supports_named_fn_args_with_assignment_operator(&self) -> bool { true } + + fn supports_struct_literal(&self) -> bool { + true + } + + fn supports_typed_struct_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index a8993e685..b4f91de73 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -375,6 +375,26 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports the STRUCT literal + /// + /// Example + /// ```sql + /// SELECT STRUCT(1 as one, 'foo' as foo, false) + /// ``` + fn supports_struct_literal(&self) -> bool { + false + } + + /// Return true if the dialect supports typed struct syntax + /// + /// Example for bigquery + /// ```sql + /// SELECT STRUCT(1, 'foo') + /// ``` + fn supports_typed_struct_syntax(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 16362ebba..065b6cfd6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1123,9 +1123,8 @@ impl<'a> Parser<'a> { Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { Ok(Some(self.parse_match_against()?)) } - Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { - self.prev_token(); - Ok(Some(self.parse_bigquery_struct_literal()?)) + Keyword::STRUCT if self.dialect.supports_struct_literal() => { + Ok(Some(self.parse_struct_literal()?)) } Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; @@ -2383,22 +2382,28 @@ impl<'a> Parser<'a> { } } - /// Bigquery specific: Parse a struct literal /// Syntax /// ```sql - /// -- typed + /// -- typed, specific to bigquery /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) /// -- typeless /// STRUCT( expr1 [AS field_name] [, ... ]) /// ``` - fn parse_bigquery_struct_literal(&mut self) -> Result { - let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - if trailing_bracket.0 { - return parser_err!( + fn parse_struct_literal(&mut self) -> Result { + let mut fields = vec![]; + // Typed struct syntax is only supported by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + if self.dialect.supports_typed_struct_syntax() { + self.prev_token(); + let trailing_bracket; + (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!( "unmatched > in STRUCT literal", self.peek_token().span.start ); + } } self.expect_token(&Token::LParen)?; @@ -2409,13 +2414,13 @@ impl<'a> Parser<'a> { Ok(Expr::Struct { values, fields }) } - /// Parse an expression value for a bigquery struct [1] + /// Parse an expression value for a struct literal /// Syntax /// ```sql /// expr [AS name] /// ``` /// - /// Parameter typed_syntax is set to true if the expression + /// For biquery [1], Parameter typed_syntax is set to true if the expression /// is to be parsed as a field expression declared using typed /// struct syntax [2], and false if using typeless struct syntax [3]. /// diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 1651d517a..7d01c14f1 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -278,3 +278,48 @@ fn parse_use() { ); } } + +#[test] +fn parse_databricks_struct_function() { + assert_eq!( + databricks() + .verified_only_select("SELECT STRUCT(1, 'foo')") + .projection[0], + SelectItem::UnnamedExpr(Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::Value(Value::SingleQuotedString("foo".to_string())) + ], + fields: vec![] + }) + ); + assert_eq!( + databricks() + .verified_only_select("SELECT STRUCT(1 AS one, 'foo' AS foo, false)") + .projection[0], + SelectItem::UnnamedExpr(Expr::Struct { + values: vec![ + Expr::Named { + expr: Expr::Value(number("1")).into(), + name: Ident::new("one") + }, + Expr::Named { + expr: Expr::Value(Value::SingleQuotedString("foo".to_string())).into(), + name: Ident::new("foo") + }, + Expr::Value(Value::Boolean(false)) + ], + fields: vec![] + }) + ); +} + +#[test] +fn parse_invalid_struct_function() { + assert_eq!( + databricks() + .parse_sql_statements("SELECT STRUCT(1)") // This works only in BigQuery + .unwrap_err(), + ParserError::ParserError("Expected: (, found: <".to_string()) + ); +} From fc553aa466477bc34a68f51458948f0842bbb415 Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Sat, 23 Nov 2024 13:42:05 -0800 Subject: [PATCH 2/2] address comments and remove typed struct flag --- src/ast/mod.rs | 6 ++++-- src/dialect/bigquery.rs | 5 ----- src/dialect/generic.rs | 4 ---- src/dialect/mod.rs | 10 ---------- src/parser/mod.rs | 20 ++++++++------------ tests/sqlparser_databricks.rs | 14 ++------------ 6 files changed, 14 insertions(+), 45 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9a202e202..d928370a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -935,12 +935,14 @@ pub enum Expr { /// Syntax: /// ```sql /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type) + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/functions/struct.html) /// ``` Struct { /// Struct values. values: Vec, - /// BigQuery specific: Struct field definitions. - /// see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + /// Struct field definitions. fields: Vec, }, /// `BigQuery` specific: An named expression in a typeless struct [1] diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 9c519435f..66d7d2061 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -77,9 +77,4 @@ impl Dialect for BigQueryDialect { fn supports_struct_literal(&self) -> bool { true } - - // See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax - fn supports_typed_struct_syntax(&self) -> bool { - true - } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 73402b8cb..61e5070fb 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -127,8 +127,4 @@ impl Dialect for GenericDialect { fn supports_struct_literal(&self) -> bool { true } - - fn supports_typed_struct_syntax(&self) -> bool { - true - } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b4f91de73..f40cba719 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -385,16 +385,6 @@ pub trait Dialect: Debug + Any { false } - /// Return true if the dialect supports typed struct syntax - /// - /// Example for bigquery - /// ```sql - /// SELECT STRUCT(1, 'foo') - /// ``` - fn supports_typed_struct_syntax(&self) -> bool { - false - } - /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 065b6cfd6..831098ba1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2384,28 +2384,24 @@ impl<'a> Parser<'a> { /// Syntax /// ```sql - /// -- typed, specific to bigquery + /// -- typed /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) /// -- typeless /// STRUCT( expr1 [AS field_name] [, ... ]) /// ``` fn parse_struct_literal(&mut self) -> Result { - let mut fields = vec![]; - // Typed struct syntax is only supported by BigQuery - // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax - if self.dialect.supports_typed_struct_syntax() { - self.prev_token(); - let trailing_bracket; - (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - if trailing_bracket.0 { - return parser_err!( + // Parse the fields definition if exist `<[field_name] field_type, ...>` + self.prev_token(); + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!( "unmatched > in STRUCT literal", self.peek_token().span.start ); - } } + // Parse the struct values `(expr1 [, ... ])` self.expect_token(&Token::LParen)?; let values = self .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 7d01c14f1..d73c088a7 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -282,7 +282,7 @@ fn parse_use() { #[test] fn parse_databricks_struct_function() { assert_eq!( - databricks() + databricks_and_generic() .verified_only_select("SELECT STRUCT(1, 'foo')") .projection[0], SelectItem::UnnamedExpr(Expr::Struct { @@ -294,7 +294,7 @@ fn parse_databricks_struct_function() { }) ); assert_eq!( - databricks() + databricks_and_generic() .verified_only_select("SELECT STRUCT(1 AS one, 'foo' AS foo, false)") .projection[0], SelectItem::UnnamedExpr(Expr::Struct { @@ -313,13 +313,3 @@ fn parse_databricks_struct_function() { }) ); } - -#[test] -fn parse_invalid_struct_function() { - assert_eq!( - databricks() - .parse_sql_statements("SELECT STRUCT(1)") // This works only in BigQuery - .unwrap_err(), - ParserError::ParserError("Expected: (, found: <".to_string()) - ); -}