Skip to content

Commit b693c8d

Browse files
committed
Introduce require_semicolon_statement_delimiter parser option
- plus, a corresponding `supports_statements_without_semicolon_delimiter` Dialect trait function - this is optional for SQL Server, so it's set to true - for the implementation, `RETURN` parsing needs to be tightened up to avoid ambiguity & tests that formerly asserted "end of statement" now maybe need to assert "an SQL statement" - a new `assert_err_parse_statements` splits the dialects based on semicolon requirements & asserts the expected error message accordingly
1 parent fc95b8f commit b693c8d

File tree

8 files changed

+633
-131
lines changed

8 files changed

+633
-131
lines changed

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,11 @@ pub trait Dialect: Debug + Any {
10211021
fn supports_set_names(&self) -> bool {
10221022
false
10231023
}
1024+
1025+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1026+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1027+
false
1028+
}
10241029
}
10251030

10261031
/// This represents the operators for which precedence must be defined

src/dialect/mssql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ impl Dialect for MsSqlDialect {
6363
}
6464

6565
fn supports_connect_by(&self) -> bool {
66-
true
66+
false
6767
}
6868

6969
fn supports_eq_alias_assignment(&self) -> bool {
@@ -119,6 +119,10 @@ impl Dialect for MsSqlDialect {
119119
true
120120
}
121121

122+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
123+
true
124+
}
125+
122126
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
123127
!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw)
124128
}
@@ -271,6 +275,9 @@ impl MsSqlDialect {
271275
) -> Result<Vec<Statement>, ParserError> {
272276
let mut stmts = Vec::new();
273277
loop {
278+
while let Token::SemiColon = parser.peek_token_ref().token {
279+
parser.advance_token();
280+
}
274281
if let Token::EOF = parser.peek_token_ref().token {
275282
break;
276283
}

src/keywords.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10621062
Keyword::ANTI,
10631063
Keyword::SEMI,
10641064
Keyword::RETURNING,
1065+
Keyword::RETURN,
10651066
Keyword::ASOF,
10661067
Keyword::MATCH_CONDITION,
10671068
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1115,6 +1116,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
11151116
Keyword::CLUSTER,
11161117
Keyword::DISTRIBUTE,
11171118
Keyword::RETURNING,
1119+
Keyword::RETURN,
11181120
// Reserved only as a column alias in the `SELECT` clause
11191121
Keyword::FROM,
11201122
Keyword::INTO,
@@ -1129,6 +1131,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
11291131
Keyword::LIMIT,
11301132
Keyword::HAVING,
11311133
Keyword::WHERE,
1134+
Keyword::RETURN,
11321135
];
11331136

11341137
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1139,4 +1142,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
11391142
Keyword::INTERVAL,
11401143
Keyword::STRUCT,
11411144
Keyword::TRIM,
1145+
Keyword::RETURN,
11421146
];

src/parser/mod.rs

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,17 @@ pub struct ParserOptions {
222222
/// Controls how literal values are unescaped. See
223223
/// [`Tokenizer::with_unescape`] for more details.
224224
pub unescape: bool,
225+
/// Determines if the parser requires a semicolon at the end of every statement.
226+
/// (Default: true)
227+
pub require_semicolon_statement_delimiter: bool,
225228
}
226229

227230
impl Default for ParserOptions {
228231
fn default() -> Self {
229232
Self {
230233
trailing_commas: false,
231234
unescape: true,
235+
require_semicolon_statement_delimiter: true,
232236
}
233237
}
234238
}
@@ -261,6 +265,22 @@ impl ParserOptions {
261265
self.unescape = unescape;
262266
self
263267
}
268+
269+
/// Set if semicolon statement delimiters are required.
270+
///
271+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
272+
///
273+
/// ```sql
274+
/// SELECT 1
275+
/// SELECT 2
276+
/// ```
277+
pub fn with_require_semicolon_statement_delimiter(
278+
mut self,
279+
require_semicolon_statement_delimiter: bool,
280+
) -> Self {
281+
self.require_semicolon_statement_delimiter = require_semicolon_statement_delimiter;
282+
self
283+
}
264284
}
265285

266286
#[derive(Copy, Clone)]
@@ -351,7 +371,11 @@ impl<'a> Parser<'a> {
351371
state: ParserState::Normal,
352372
dialect,
353373
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
354-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
374+
options: ParserOptions::new()
375+
.with_trailing_commas(dialect.supports_trailing_commas())
376+
.with_require_semicolon_statement_delimiter(
377+
!dialect.supports_statements_without_semicolon_delimiter(),
378+
),
355379
}
356380
}
357381

@@ -470,10 +494,10 @@ impl<'a> Parser<'a> {
470494
match self.peek_token().token {
471495
Token::EOF => break,
472496

473-
// end of statement
474-
Token::Word(word) => {
475-
if expecting_statement_delimiter && word.keyword == Keyword::END {
476-
break;
497+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
498+
Token::Whitespace(Whitespace::Newline) => {
499+
if !self.options.require_semicolon_statement_delimiter {
500+
expecting_statement_delimiter = false;
477501
}
478502
}
479503
_ => {}
@@ -485,7 +509,7 @@ impl<'a> Parser<'a> {
485509

486510
let statement = self.parse_statement()?;
487511
stmts.push(statement);
488-
expecting_statement_delimiter = true;
512+
expecting_statement_delimiter = self.options.require_semicolon_statement_delimiter;
489513
}
490514
Ok(stmts)
491515
}
@@ -4513,6 +4537,9 @@ impl<'a> Parser<'a> {
45134537
) -> Result<Vec<Statement>, ParserError> {
45144538
let mut values = vec![];
45154539
loop {
4540+
// ignore empty statements (between successive statement delimiters)
4541+
while self.consume_token(&Token::SemiColon) {}
4542+
45164543
match &self.peek_nth_token_ref(0).token {
45174544
Token::EOF => break,
45184545
Token::Word(w) => {
@@ -4524,7 +4551,13 @@ impl<'a> Parser<'a> {
45244551
}
45254552

45264553
values.push(self.parse_statement()?);
4527-
self.expect_token(&Token::SemiColon)?;
4554+
4555+
if self.options.require_semicolon_statement_delimiter {
4556+
self.expect_token(&Token::SemiColon)?;
4557+
}
4558+
4559+
// ignore empty statements (between successive statement delimiters)
4560+
while self.consume_token(&Token::SemiColon) {}
45284561
}
45294562
Ok(values)
45304563
}
@@ -15639,7 +15672,28 @@ impl<'a> Parser<'a> {
1563915672

1564015673
/// Parse [Statement::Return]
1564115674
fn parse_return(&mut self) -> Result<Statement, ParserError> {
15642-
match self.maybe_parse(|p| p.parse_expr())? {
15675+
let rs = self.maybe_parse(|p| {
15676+
let expr = p.parse_expr()?;
15677+
15678+
match &expr {
15679+
Expr::Value(_)
15680+
| Expr::Function(_)
15681+
| Expr::UnaryOp { .. }
15682+
| Expr::BinaryOp { .. }
15683+
| Expr::Case { .. }
15684+
| Expr::Cast { .. }
15685+
| Expr::Convert { .. }
15686+
| Expr::Subquery(_) => Ok(expr),
15687+
// todo: how to retstrict to variables?
15688+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
15689+
_ => parser_err!(
15690+
"Non-returnable expression found following RETURN",
15691+
p.peek_token().span.start
15692+
),
15693+
}
15694+
})?;
15695+
15696+
match rs {
1564315697
Some(expr) => Ok(Statement::Return(ReturnStatement {
1564415698
value: Some(ReturnStatementValue::Expr(expr)),
1564515699
})),

src/test_utils.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,37 @@ impl TestedDialects {
186186
statements
187187
}
188188

189+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
190+
pub fn statements_without_semicolons_parse_to(
191+
&self,
192+
sql: &str,
193+
canonical: &str,
194+
) -> Vec<Statement> {
195+
let sql_without_semicolons = sql
196+
.replace("; ", " ")
197+
.replace(" ;", " ")
198+
.replace(";\n", "\n")
199+
.replace("\n;", "\n")
200+
.replace(";", " ");
201+
let statements = self
202+
.parse_sql_statements(&sql_without_semicolons)
203+
.expect(&sql_without_semicolons);
204+
if !canonical.is_empty() && sql != canonical {
205+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
206+
} else {
207+
assert_eq!(
208+
sql,
209+
statements
210+
.iter()
211+
// note: account for format_statement_list manually inserted semicolons
212+
.map(|s| s.to_string().trim_end_matches(";").to_string())
213+
.collect::<Vec<_>>()
214+
.join("; ")
215+
);
216+
}
217+
statements
218+
}
219+
189220
/// Ensures that `sql` parses as an [`Expr`], and that
190221
/// re-serializing the parse result produces canonical
191222
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -313,6 +344,43 @@ where
313344
all_dialects_where(|d| !except(d))
314345
}
315346

347+
/// Returns all dialects that don't support statements without semicolon delimiters.
348+
/// (i.e. dialects that require semicolon delimiters.)
349+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
350+
let tested_dialects =
351+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
352+
assert_ne!(tested_dialects.dialects.len(), 0);
353+
tested_dialects
354+
}
355+
356+
/// Returns all dialects that do support statements without semicolon delimiters.
357+
/// (i.e. dialects not requiring semicolon delimiters.)
358+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
359+
let tested_dialects =
360+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
361+
assert_ne!(tested_dialects.dialects.len(), 0);
362+
tested_dialects
363+
}
364+
365+
/// Asserts an error for `parse_sql_statements`:
366+
/// - "end of statement" for dialects that require semicolon delimiters
367+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
368+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
369+
assert_eq!(
370+
ParserError::ParserError(format!("Expected: end of statement, found: {}", found)),
371+
all_dialects_requiring_semicolon_statement_delimiter()
372+
.parse_sql_statements(sql)
373+
.unwrap_err()
374+
);
375+
376+
assert_eq!(
377+
ParserError::ParserError(format!("Expected: an SQL statement, found: {}", found)),
378+
all_dialects_not_requiring_semicolon_statement_delimiter()
379+
.parse_sql_statements(sql)
380+
.unwrap_err()
381+
);
382+
}
383+
316384
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
317385
assert_eq!(
318386
expected,

0 commit comments

Comments
 (0)