Skip to content

Commit fa4dd4d

Browse files
committed
Add support for Databricks JSON path syntax
1 parent 54d6e6e commit fa4dd4d

File tree

9 files changed

+188
-12
lines changed

9 files changed

+188
-12
lines changed

examples/cli.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ $ cargo run --example cli - [--dialectname]
4848

4949
let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
5050
"--ansi" => Box::new(AnsiDialect {}),
51+
"--databricks" => Box::new(DatabricksDialect {}),
5152
"--bigquery" => Box::new(BigQueryDialect {}),
5253
"--postgres" => Box::new(PostgreSqlDialect {}),
5354
"--ms" => Box::new(MsSqlDialect {}),

src/ast/mod.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,8 +527,16 @@ pub enum JsonPathElem {
527527
/// Accesses an object field or array element using bracket notation,
528528
/// e.g. `obj['foo']`.
529529
///
530+
/// Note that on Databricks this is *not* equivalent to dot notation; the
531+
/// former is case-insensitive but the latter is not.
532+
///
530533
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
531534
Bracket { key: Expr },
535+
/// Accesses all elements in the given (generally array) element. Used for
536+
/// constructs like `foo:bar[*].baz`.
537+
///
538+
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-json-path-expression#extract-values-from-arrays>
539+
AllElements,
532540
}
533541

534542
/// A JSON path.
@@ -539,17 +547,22 @@ pub enum JsonPathElem {
539547
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
540548
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
541549
pub struct JsonPath {
550+
/// True if the path should start with a colon. Some dialects (e.g. Snowflake) allow
551+
/// `a['b']`, whereas others (e.g. Databricks) require the colon even in this case
552+
/// (so `a:['b']`).
553+
pub has_colon: bool,
542554
pub path: Vec<JsonPathElem>,
543555
}
544556

545557
impl fmt::Display for JsonPath {
546558
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559+
if self.has_colon {
560+
write!(f, ":")?;
561+
}
547562
for (i, elem) in self.path.iter().enumerate() {
548563
match elem {
549564
JsonPathElem::Dot { key, quoted } => {
550-
if i == 0 {
551-
write!(f, ":")?;
552-
} else {
565+
if i != 0 {
553566
write!(f, ".")?;
554567
}
555568

@@ -562,6 +575,9 @@ impl fmt::Display for JsonPath {
562575
JsonPathElem::Bracket { key } => {
563576
write!(f, "[{key}]")?;
564577
}
578+
JsonPathElem::AllElements => {
579+
write!(f, "[*]")?;
580+
}
565581
}
566582
}
567583
Ok(())

src/ast/spans.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1747,7 +1747,7 @@ impl Spanned for FunctionArgumentClause {
17471747
/// see Spanned impl for JsonPathElem for more information
17481748
impl Spanned for JsonPath {
17491749
fn span(&self) -> Span {
1750-
let JsonPath { path } = self;
1750+
let JsonPath { path, has_colon: _ } = self;
17511751

17521752
union_spans(path.iter().map(|i| i.span()))
17531753
}
@@ -1757,11 +1757,13 @@ impl Spanned for JsonPath {
17571757
///
17581758
/// Missing spans:
17591759
/// - [JsonPathElem::Dot]
1760+
/// - [JsonPathElem::AllElements]
17601761
impl Spanned for JsonPathElem {
17611762
fn span(&self) -> Span {
17621763
match self {
17631764
JsonPathElem::Dot { .. } => Span::empty(),
17641765
JsonPathElem::Bracket { key } => key.span(),
1766+
JsonPathElem::AllElements => Span::empty(),
17651767
}
17661768
}
17671769
}

src/dialect/databricks.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::dialect::Dialect;
18+
use crate::dialect::{Dialect, Precedence};
19+
use crate::parser::{Parser, ParserError};
20+
use crate::tokenizer::Token;
1921

2022
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
2123
///
@@ -38,6 +40,19 @@ impl Dialect for DatabricksDialect {
3840
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
3941
}
4042

43+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
44+
let token = parser.peek_token();
45+
// : is used for JSON path access
46+
match token.token {
47+
Token::Colon => Some(Ok(self.prec_value(Precedence::Period))),
48+
_ => None,
49+
}
50+
}
51+
52+
fn supports_semi_structured_array_all_elements(&self) -> bool {
53+
true
54+
}
55+
4156
fn supports_filter_during_aggregation(&self) -> bool {
4257
true
4358
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,11 @@ pub trait Dialect: Debug + Any {
892892
false
893893
}
894894

895+
/// Returns true if the dialect supports writing `[*]` to select all elements in a JSON array.
896+
fn supports_semi_structured_array_all_elements(&self) -> bool {
897+
false
898+
}
899+
895900
/// Returns true if the specified keyword is reserved and cannot be
896901
/// used as an identifier without special handling like quoting.
897902
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {

src/parser/mod.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3632,7 +3632,8 @@ impl<'a> Parser<'a> {
36323632
expr: Box::new(expr),
36333633
})
36343634
} else if Token::LBracket == *tok && self.dialect.supports_partiql()
3635-
|| (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok)
3635+
|| (dialect_of!(self is SnowflakeDialect | GenericDialect | DatabricksDialect)
3636+
&& Token::Colon == *tok)
36363637
{
36373638
self.prev_token();
36383639
self.parse_json_access(expr)
@@ -3779,21 +3780,26 @@ impl<'a> Parser<'a> {
37793780
})
37803781
}
37813782

3783+
// Parser is either looking at a : or a bracket expression.
37823784
fn parse_json_path(&mut self) -> Result<JsonPath, ParserError> {
37833785
let mut path = Vec::new();
3786+
let mut has_colon = false;
37843787
loop {
37853788
match self.next_token().token {
37863789
Token::Colon if path.is_empty() => {
3787-
path.push(self.parse_json_path_object_key()?);
3790+
has_colon = true;
3791+
if *self.peek_token_ref() == Token::LBracket {
3792+
path.push(self.parse_json_path_bracket_element()?);
3793+
} else {
3794+
path.push(self.parse_json_path_object_key()?);
3795+
}
37883796
}
37893797
Token::Period if !path.is_empty() => {
37903798
path.push(self.parse_json_path_object_key()?);
37913799
}
37923800
Token::LBracket => {
3793-
let key = self.parse_expr()?;
3794-
self.expect_token(&Token::RBracket)?;
3795-
3796-
path.push(JsonPathElem::Bracket { key });
3801+
self.prev_token();
3802+
path.push(self.parse_json_path_bracket_element()?);
37973803
}
37983804
_ => {
37993805
self.prev_token();
@@ -3803,7 +3809,23 @@ impl<'a> Parser<'a> {
38033809
}
38043810

38053811
debug_assert!(!path.is_empty());
3806-
Ok(JsonPath { path })
3812+
Ok(JsonPath { has_colon, path })
3813+
}
3814+
3815+
/// Parses a single bracketed element in a JSON path expression, including both brackets.
3816+
fn parse_json_path_bracket_element(&mut self) -> Result<JsonPathElem, ParserError> {
3817+
self.expect_token(&Token::LBracket)?;
3818+
let elem = if *self.peek_token_ref() == Token::Mul
3819+
&& self.dialect.supports_semi_structured_array_all_elements()
3820+
{
3821+
self.expect_token(&Token::Mul)?;
3822+
JsonPathElem::AllElements
3823+
} else {
3824+
let key = self.parse_expr()?;
3825+
JsonPathElem::Bracket { key }
3826+
};
3827+
self.expect_token(&Token::RBracket)?;
3828+
Ok(elem)
38073829
}
38083830

38093831
/// Parses the parens following the `[ NOT ] IN` operator.

tests/sqlparser_databricks.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,100 @@ fn data_type_timestamp_ntz() {
360360
s => panic!("Unexpected statement: {:?}", s),
361361
}
362362
}
363+
364+
#[test]
365+
fn parse_semi_structured_data_traversal() {
366+
// basic case
367+
let sql = "SELECT a:b.c FROM t";
368+
let select = databricks().verified_only_select(sql);
369+
assert_eq!(
370+
SelectItem::UnnamedExpr(Expr::JsonAccess {
371+
value: Box::new(Expr::Identifier(Ident::new("a"))),
372+
path: JsonPath {
373+
has_colon: true,
374+
path: vec![
375+
JsonPathElem::Dot {
376+
key: "b".to_owned(),
377+
quoted: false
378+
},
379+
JsonPathElem::Dot {
380+
key: "c".to_owned(),
381+
quoted: false
382+
}
383+
]
384+
},
385+
}),
386+
select.projection[0]
387+
);
388+
389+
// brackets
390+
let sql = "SELECT a:b['c'][0] FROM t";
391+
let select = databricks().verified_only_select(sql);
392+
assert_eq!(
393+
SelectItem::UnnamedExpr(Expr::JsonAccess {
394+
value: Box::new(Expr::Identifier(Ident::new("a"))),
395+
path: JsonPath {
396+
has_colon: true,
397+
path: vec![
398+
JsonPathElem::Dot {
399+
key: "b".to_owned(),
400+
quoted: false
401+
},
402+
JsonPathElem::Bracket {
403+
key: Expr::value(Value::SingleQuotedString("c".to_owned()))
404+
},
405+
JsonPathElem::Bracket {
406+
key: Expr::value(number("0"))
407+
}
408+
]
409+
},
410+
}),
411+
select.projection[0]
412+
);
413+
414+
// asterisk for arrays
415+
let sql = "SELECT a:['b'].c FROM t";
416+
let select = databricks().verified_only_select(sql);
417+
assert_eq!(
418+
SelectItem::UnnamedExpr(Expr::JsonAccess {
419+
value: Box::new(Expr::Identifier(Ident::new("a"))),
420+
path: JsonPath {
421+
has_colon: true,
422+
path: vec![
423+
JsonPathElem::Bracket {
424+
key: Expr::value(Value::SingleQuotedString("b".to_owned())),
425+
},
426+
JsonPathElem::Dot {
427+
key: "c".to_owned(),
428+
quoted: false
429+
}
430+
]
431+
},
432+
}),
433+
select.projection[0]
434+
);
435+
436+
// asterisk for arrays
437+
let sql = "SELECT a:b[*].c FROM t";
438+
let select = databricks().verified_only_select(sql);
439+
assert_eq!(
440+
SelectItem::UnnamedExpr(Expr::JsonAccess {
441+
value: Box::new(Expr::Identifier(Ident::new("a"))),
442+
path: JsonPath {
443+
has_colon: true,
444+
path: vec![
445+
JsonPathElem::Dot {
446+
key: "b".to_owned(),
447+
quoted: false
448+
},
449+
JsonPathElem::AllElements,
450+
JsonPathElem::Dot {
451+
key: "c".to_owned(),
452+
quoted: false
453+
}
454+
]
455+
},
456+
}),
457+
select.projection[0]
458+
);
459+
}

tests/sqlparser_redshift.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ fn test_redshift_json_path() {
206206
Ident::new("c_orders")
207207
])),
208208
path: JsonPath {
209+
has_colon: false,
209210
path: vec![
210211
JsonPathElem::Bracket {
211212
key: Expr::value(number("0"))
@@ -229,6 +230,7 @@ fn test_redshift_json_path() {
229230
Ident::new("c_orders")
230231
])),
231232
path: JsonPath {
233+
has_colon: false,
232234
path: vec![
233235
JsonPathElem::Bracket {
234236
key: Expr::value(number("0"))
@@ -255,6 +257,7 @@ fn test_redshift_json_path() {
255257
Ident::new("col1")
256258
])),
257259
path: JsonPath {
260+
has_colon: false,
258261
path: vec![
259262
JsonPathElem::Bracket {
260263
key: Expr::value(number("0"))
@@ -281,6 +284,7 @@ fn test_redshift_json_path() {
281284
Ident::new("col1")
282285
])),
283286
path: JsonPath {
287+
has_colon: false,
284288
path: vec![
285289
JsonPathElem::Bracket {
286290
key: Expr::value(number("0"))
@@ -308,6 +312,7 @@ fn test_parse_json_path_from() {
308312
assert_eq!(
309313
json_path,
310314
&Some(JsonPath {
315+
has_colon: false,
311316
path: vec![
312317
JsonPathElem::Bracket {
313318
key: Expr::value(number("0"))
@@ -332,6 +337,7 @@ fn test_parse_json_path_from() {
332337
assert_eq!(
333338
json_path,
334339
&Some(JsonPath {
340+
has_colon: false,
335341
path: vec![
336342
JsonPathElem::Bracket {
337343
key: Expr::value(number("0"))

0 commit comments

Comments
 (0)