From bbd99a438d155994ebd871fcb0848d45065d0b05 Mon Sep 17 00:00:00 2001 From: lovasoa Date: Sat, 18 Nov 2023 17:30:34 +0100 Subject: [PATCH] add support for CONVERT expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes #1047 adds support for the following CONVERT syntaxes: - `CONVERT('héhé' USING utf8mb4)` (MySQL, Postgres) - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first --- src/ast/mod.rs | 33 +++++++++++++++++++++++++ src/dialect/mod.rs | 5 ++++ src/dialect/mssql.rs | 6 +++++ src/dialect/redshift.rs | 6 +++++ src/parser/mod.rs | 52 ++++++++++++++++++++++++++++++++++++++++ tests/sqlparser_mssql.rs | 7 ++++++ tests/sqlparser_mysql.rs | 15 ++++++++++++ 7 files changed, 124 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3929d228b..3fcc47fbe 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -473,6 +473,17 @@ pub enum Expr { }, /// Unary operation e.g. `NOT foo` UnaryOp { op: UnaryOperator, expr: Box }, + /// CONVERT a value to a different data type or character encoding `CONVERT(foo USING utf8mb4)` + Convert { + /// The expression to convert + expr: Box, + /// The target data type + data_type: Option, + /// The target character encoding + charset: Option, + /// whether the target comes before the expr (MSSQL syntax) + target_before_value: bool, + }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { expr: Box, @@ -844,6 +855,28 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } + Expr::Convert { + expr, + target_before_value, + data_type, + charset, + } => { + write!(f, "CONVERT(")?; + if let Some(data_type) = data_type { + if let Some(charset) = charset { + write!(f, "{expr}, {data_type} CHARACTER SET {charset}") + } else if *target_before_value { + write!(f, "{data_type}, {expr}") + } else { + write!(f, "{expr}, {data_type}") + } + } else if let Some(charset) = charset { + write!(f, "{expr} USING {charset}") + } else { + write!(f, "{expr}") // This should never happen + }?; + write!(f, ")") + } Expr::Cast { expr, data_type, diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index accf61efc..53bb891de 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -128,6 +128,11 @@ pub trait Dialect: Debug + Any { fn supports_in_empty_list(&self) -> bool { false } + /// Returns true if the dialect has a CONVERT function which accepts a type first + /// and an expression second, e.g. `CONVERT(varchar, 1)` + fn convert_type_before_value(&self) -> bool { + false + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 26ecd4782..c7bf11864 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -35,6 +35,12 @@ impl Dialect for MsSqlDialect { || ch == '_' } + /// SQL Server has `CONVERT(type, value)` instead of `CONVERT(value, type)` + /// + fn convert_type_before_value(&self) -> bool { + true + } + fn supports_substring_from_for_expr(&self) -> bool { false } diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 73457ab30..8dc7d573a 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -53,4 +53,10 @@ impl Dialect for RedshiftSqlDialect { // Extends Postgres dialect with sharp PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' } + + /// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)` + /// + fn convert_type_before_value(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bad0470c1..5add7952a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -821,6 +821,7 @@ impl<'a> Parser<'a> { self.parse_time_functions(ObjectName(vec![w.to_ident()])) } Keyword::CASE => self.parse_case_expr(), + Keyword::CONVERT => self.parse_convert_expr(), Keyword::CAST => self.parse_cast_expr(), Keyword::TRY_CAST => self.parse_try_cast_expr(), Keyword::SAFE_CAST => self.parse_safe_cast_expr(), @@ -1227,6 +1228,57 @@ impl<'a> Parser<'a> { } } + /// mssql-like convert function + fn parse_mssql_convert(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + expr: Box::new(expr), + data_type: Some(data_type), + charset: None, + target_before_value: true, + }) + } + + /// Parse a SQL CONVERT function: + /// - `CONVERT('héhé' USING utf8mb4)` (MySQL) + /// - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) + /// - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first + pub fn parse_convert_expr(&mut self) -> Result { + if self.dialect.convert_type_before_value() { + return self.parse_mssql_convert(); + } + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::USING) { + let charset = self.parse_object_name()?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::Convert { + expr: Box::new(expr), + data_type: None, + charset: Some(charset), + target_before_value: false, + }); + } + self.expect_token(&Token::Comma)?; + let data_type = self.parse_data_type()?; + let charset = if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Some(self.parse_object_name()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + expr: Box::new(expr), + data_type: Some(data_type), + charset, + target_before_value: false, + }) + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 59a68d2c8..7d5beca9c 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -475,6 +475,13 @@ fn parse_cast_varchar_max() { ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))"); } +#[test] +fn parse_convert() { + ms().verified_expr("CONVERT(VARCHAR(MAX), 'foo')"); + ms().verified_expr("CONVERT(VARCHAR(10), 'foo')"); + ms().verified_expr("CONVERT(DECIMAL(10,5), 12.55)"); +} + #[test] fn parse_similar_to() { fn chk(negated: bool) { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c80003b7d..ce31ce037 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1837,3 +1837,18 @@ fn parse_drop_temporary_table() { _ => unreachable!(), } } + +#[test] +fn parse_convert_using() { + // https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_convert + + // CONVERT(expr USING transcoding_name) + mysql().verified_only_select("SELECT CONVERT('x' USING latin1)"); + mysql().verified_only_select("SELECT CONVERT(my_column USING utf8mb4) FROM my_table"); + + // CONVERT(expr, type) + mysql().verified_only_select("SELECT CONVERT('abc', CHAR(60))"); + mysql().verified_only_select("SELECT CONVERT(123.456, DECIMAL(5,2))"); + // with a type + a charset + mysql().verified_only_select("SELECT CONVERT('test', CHAR CHARACTER SET utf8mb4)"); +}