From 9eba94c471356b28b65b36072faeb6fe3d513c9b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 4 Feb 2025 12:13:55 +0100 Subject: [PATCH 1/3] Require space after -- to start single line comment in MySQL --- src/dialect/mod.rs | 9 +++++++ src/dialect/mysql.rs | 9 +++++++ src/tokenizer.rs | 62 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b648869d2..d008a3906 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -880,6 +880,15 @@ pub trait Dialect: Debug + Any { fn supports_table_hints(&self) -> bool { false } + + /// Returns whether it's the start of a single line comment + /// e.g. MySQL requires a space after `--` to be a single line comment + /// Otherwise it's interpreted as a double minus operator + /// + /// MySQL: + fn is_start_of_single_line_comment(&self, _ch: char) -> bool { + true + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index a67fe67b0..3aaef6154 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -125,6 +125,15 @@ impl Dialect for MySqlDialect { fn supports_table_hints(&self) -> bool { true } + + /// Returns whether it's the start of a single line comment + /// e.g. MySQL requires a space after `--` to be a single line comment + /// Otherwise it's interpreted as a double minus operator + /// + /// MySQL: + fn is_start_of_single_line_comment(&self, _ch: char) -> bool { + _ch == ' ' + } } /// `LOCK TABLES` diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7742e8fae..ffe912ebe 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1229,15 +1229,26 @@ impl<'a> Tokenizer<'a> { // operators '-' => { chars.next(); // consume the '-' - match chars.peek() { - Some('-') => { - chars.next(); // consume the second '-', starting a single-line comment - let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_owned(), - comment, - }))) + + // Potential start of a single-line comment + if let Some('-') = chars.peek() { + if let Some(next_char) = chars.peekable.clone().nth(1) { + // MySQL requires a space after the -- for a single-line comment + // Otherwise it's interpreted as two minus signs + // e.g. UPDATE account SET balance=balance--1 + // WHERE account_id=5752; + if self.dialect.is_start_of_single_line_comment(next_char) { + chars.next(); // consume second '-' + let comment = self.tokenize_single_line_comment(chars); + return Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_owned(), + comment, + }))); + } } + } + + match chars.peek() { Some('>') => { chars.next(); match chars.peek() { @@ -3685,4 +3696,39 @@ mod tests { ], ); } + + #[test] + fn test_mysql_space_after_single_line_comment_missing() { + let sql = "SELECT --'abc' FROM DUAL"; + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Minus, + Token::Minus, + Token::SingleQuotedString("abc".to_string()), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("DUAL", None), + ]; + compare(expected, tokens); + } + + #[test] + fn test_mysql_space_after_single_line_comment_present() { + let sql = "SELECT -- 'abc' FROM DUAL"; + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc' FROM DUAL".to_string(), + }), + ]; + compare(expected, tokens); + } } From 2b4b6ceaaee97d6ef0c0167608f8d8c2ec66725b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 5 Feb 2025 00:40:35 +0100 Subject: [PATCH 2/3] Refactor and improve tests --- src/dialect/mod.rs | 4 +- src/dialect/mysql.rs | 4 +- src/tokenizer.rs | 121 +++++++++++++++++++++++++++------------ tests/sqlparser_mysql.rs | 59 +++++++++++++++++++ 4 files changed, 146 insertions(+), 42 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index d008a3906..fdc90fd89 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -886,8 +886,8 @@ pub trait Dialect: Debug + Any { /// Otherwise it's interpreted as a double minus operator /// /// MySQL: - fn is_start_of_single_line_comment(&self, _ch: char) -> bool { - true + fn requires_whitespace_to_start_comment(&self) -> bool { + false } } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 3aaef6154..363ac9316 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -131,8 +131,8 @@ impl Dialect for MySqlDialect { /// Otherwise it's interpreted as a double minus operator /// /// MySQL: - fn is_start_of_single_line_comment(&self, _ch: char) -> bool { - _ch == ' ' + fn requires_whitespace_to_start_comment(&self) -> bool { + true } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ffe912ebe..f8e29cf3f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1230,22 +1230,27 @@ impl<'a> Tokenizer<'a> { '-' => { chars.next(); // consume the '-' - // Potential start of a single-line comment if let Some('-') = chars.peek() { - if let Some(next_char) = chars.peekable.clone().nth(1) { + let mut is_comment = true; + if self.dialect.requires_whitespace_to_start_comment() { // MySQL requires a space after the -- for a single-line comment // Otherwise it's interpreted as two minus signs // e.g. UPDATE account SET balance=balance--1 // WHERE account_id=5752; - if self.dialect.is_start_of_single_line_comment(next_char) { - chars.next(); // consume second '-' - let comment = self.tokenize_single_line_comment(chars); - return Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_owned(), - comment, - }))); + match chars.peekable.clone().nth(1) { + Some(' ') => (), + _ => is_comment = false, } } + + if is_comment { + chars.next(); // consume second '-' + let comment = self.tokenize_single_line_comment(chars); + return Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_owned(), + comment, + }))); + } } match chars.peek() { @@ -3698,37 +3703,77 @@ mod tests { } #[test] - fn test_mysql_space_after_single_line_comment_missing() { - let sql = "SELECT --'abc' FROM DUAL"; - let dialect = MySqlDialect {}; - let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); - let expected = vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Minus, - Token::Minus, - Token::SingleQuotedString("abc".to_string()), - Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), - Token::make_word("DUAL", None), - ]; - compare(expected, tokens); + fn test_whitespace_required_after_single_line_comment() { + all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT --'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Minus, + Token::Minus, + Token::SingleQuotedString("abc".to_string()), + ], + ); + + all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT -- 'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + }), + ], + ); + + all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT --", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Minus, + Token::Minus, + ], + ); } #[test] - fn test_mysql_space_after_single_line_comment_present() { - let sql = "SELECT -- 'abc' FROM DUAL"; - let dialect = MySqlDialect {}; - let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); - let expected = vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc' FROM DUAL".to_string(), - }), - ]; - compare(expected, tokens); + fn test_whitespace_not_required_after_single_line_comment() { + all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT --'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "'abc'".to_string(), + }), + ], + ); + + all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT -- 'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + }), + ], + ); + + all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( + "SELECT --", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "".to_string(), + }), + ], + ); } } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2e6dfc72b..fa4863fad 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3244,3 +3244,62 @@ fn parse_double_precision() { "CREATE TABLE foo (bar DOUBLE(11,0))", ); } + +#[test] +fn parse_looks_like_single_line_comment() { + // See https://dev.mysql.com/doc/refman/8.4/en/ansi-diff-comments.html + match mysql().parse_sql_statements( + r#" + UPDATE account SET balance=balance--1 + WHERE account_id=5752 + "#, + ) { + Ok(statement) => match statement.first() { + Some(Statement::Update { assignments, .. }) => { + assert_eq!(assignments.len(), 1); + assert_eq!( + assignments[0], + Assignment { + value: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("balance"))), + op: BinaryOperator::Minus, + right: Box::new(Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Expr::Value(number("1"))) + }), + }, + target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new( + "balance".to_string() + )])), + } + ); + } + _ => panic!("expected update statement"), + }, + _ => panic!("expected error"), + } + + match mysql().parse_sql_statements( + r#" + UPDATE account SET balance=balance-- 1 + WHERE account_id=5752 + "#, + ) { + Ok(statement) => match statement.first() { + Some(Statement::Update { assignments, .. }) => { + assert_eq!(assignments.len(), 1); + assert_eq!( + assignments[0], + Assignment { + value: Expr::Identifier(Ident::new("balance")), + target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new( + "balance".to_string() + )])), + } + ); + } + _ => panic!("expected update statement"), + }, + _ => panic!("expected error"), + } +} From 4a90098511232373b50af03a1600f4972608707c Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 5 Feb 2025 17:12:01 +0100 Subject: [PATCH 3/3] Feedback --- src/dialect/mod.rs | 8 +- src/dialect/mysql.rs | 7 +- src/tokenizer.rs | 170 ++++++++++++++++++++------------------- tests/sqlparser_mysql.rs | 58 ++----------- 4 files changed, 98 insertions(+), 145 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fdc90fd89..135fd8670 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -881,12 +881,12 @@ pub trait Dialect: Debug + Any { false } - /// Returns whether it's the start of a single line comment - /// e.g. MySQL requires a space after `--` to be a single line comment - /// Otherwise it's interpreted as a double minus operator + /// Returns true if this dialect requires a whitespace character after `--` to start a single line comment. /// /// MySQL: - fn requires_whitespace_to_start_comment(&self) -> bool { + /// e.g. UPDATE account SET balance=balance--1 + // WHERE account_id=5752 ^^^ will be interpreted as two minus signs instead of a comment + fn requires_single_line_comment_whitespace(&self) -> bool { false } } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 363ac9316..55b91ad22 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -126,12 +126,7 @@ impl Dialect for MySqlDialect { true } - /// Returns whether it's the start of a single line comment - /// e.g. MySQL requires a space after `--` to be a single line comment - /// Otherwise it's interpreted as a double minus operator - /// - /// MySQL: - fn requires_whitespace_to_start_comment(&self) -> bool { + fn requires_single_line_comment_whitespace(&self) -> bool { true } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f8e29cf3f..d4e530c9d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1230,30 +1230,26 @@ impl<'a> Tokenizer<'a> { '-' => { chars.next(); // consume the '-' - if let Some('-') = chars.peek() { - let mut is_comment = true; - if self.dialect.requires_whitespace_to_start_comment() { - // MySQL requires a space after the -- for a single-line comment - // Otherwise it's interpreted as two minus signs - // e.g. UPDATE account SET balance=balance--1 - // WHERE account_id=5752; - match chars.peekable.clone().nth(1) { - Some(' ') => (), - _ => is_comment = false, + match chars.peek() { + Some('-') => { + let mut is_comment = true; + if self.dialect.requires_single_line_comment_whitespace() { + is_comment = Some(' ') == chars.peekable.clone().nth(1); } - } - if is_comment { - chars.next(); // consume second '-' - let comment = self.tokenize_single_line_comment(chars); - return Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_owned(), - comment, - }))); - } - } + if is_comment { + chars.next(); // consume second '-' + let comment = self.tokenize_single_line_comment(chars); + return Ok(Some(Token::Whitespace( + Whitespace::SingleLineComment { + prefix: "--".to_owned(), + comment, + }, + ))); + } - match chars.peek() { + self.start_binop(chars, "-", Token::Minus) + } Some('>') => { chars.next(); match chars.peek() { @@ -3704,76 +3700,82 @@ mod tests { #[test] fn test_whitespace_required_after_single_line_comment() { - all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT --'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Minus, - Token::Minus, - Token::SingleQuotedString("abc".to_string()), - ], - ); + all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT --'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Minus, + Token::Minus, + Token::SingleQuotedString("abc".to_string()), + ], + ); - all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), - ], - ); + all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT -- 'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + }), + ], + ); - all_dialects_where(|dialect| dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Minus, - Token::Minus, - ], - ); + all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT --", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Minus, + Token::Minus, + ], + ); } #[test] fn test_whitespace_not_required_after_single_line_comment() { - all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT --'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "'abc'".to_string(), - }), - ], - ); + all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT --'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "'abc'".to_string(), + }), + ], + ); - all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), - ], - ); + all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT -- 'abc'", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + }), + ], + ); - all_dialects_where(|dialect| !dialect.requires_whitespace_to_start_comment()).tokenizes_to( - "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "".to_string(), - }), - ], - ); + all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) + .tokenizes_to( + "SELECT --", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "".to_string(), + }), + ], + ); } } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index fa4863fad..93237bbfa 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3247,59 +3247,15 @@ fn parse_double_precision() { #[test] fn parse_looks_like_single_line_comment() { - // See https://dev.mysql.com/doc/refman/8.4/en/ansi-diff-comments.html - match mysql().parse_sql_statements( - r#" - UPDATE account SET balance=balance--1 - WHERE account_id=5752 - "#, - ) { - Ok(statement) => match statement.first() { - Some(Statement::Update { assignments, .. }) => { - assert_eq!(assignments.len(), 1); - assert_eq!( - assignments[0], - Assignment { - value: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("balance"))), - op: BinaryOperator::Minus, - right: Box::new(Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Box::new(Expr::Value(number("1"))) - }), - }, - target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new( - "balance".to_string() - )])), - } - ); - } - _ => panic!("expected update statement"), - }, - _ => panic!("expected error"), - } - - match mysql().parse_sql_statements( + mysql().one_statement_parses_to( + "UPDATE account SET balance=balance--1 WHERE account_id=5752", + "UPDATE account SET balance = balance - -1 WHERE account_id = 5752", + ); + mysql().one_statement_parses_to( r#" UPDATE account SET balance=balance-- 1 WHERE account_id=5752 "#, - ) { - Ok(statement) => match statement.first() { - Some(Statement::Update { assignments, .. }) => { - assert_eq!(assignments.len(), 1); - assert_eq!( - assignments[0], - Assignment { - value: Expr::Identifier(Ident::new("balance")), - target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new( - "balance".to_string() - )])), - } - ); - } - _ => panic!("expected update statement"), - }, - _ => panic!("expected error"), - } + "UPDATE account SET balance = balance WHERE account_id = 5752", + ); }