Skip to content

Commit a93ba22

Browse files
authored
Merge branch 'apache:main' into main
2 parents 7fa413c + 724a1d1 commit a93ba22

File tree

9 files changed

+484
-11
lines changed

9 files changed

+484
-11
lines changed

src/ast/mod.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,23 @@ pub enum Expr {
808808
},
809809
/// Scalar function call e.g. `LEFT(foo, 5)`
810810
Function(Function),
811+
/// Arbitrary expr method call
812+
///
813+
/// Syntax:
814+
///
815+
/// `<arbitrary-expr>.<function-call>.<function-call-expr>...`
816+
///
817+
/// > `arbitrary-expr` can be any expression including a function call.
818+
///
819+
/// Example:
820+
///
821+
/// ```sql
822+
/// SELECT (SELECT ',' + name FROM sys.objects FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)')
823+
/// SELECT CONVERT(XML,'<Book>abc</Book>').value('.','NVARCHAR(MAX)').value('.','NVARCHAR(MAX)')
824+
/// ```
825+
///
826+
/// (mssql): <https://learn.microsoft.com/en-us/sql/t-sql/xml/xml-data-type-methods?view=sql-server-ver16>
827+
Method(Method),
811828
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
812829
///
813830
/// Note we only recognize a complete single expression as `<condition>`,
@@ -1464,6 +1481,7 @@ impl fmt::Display for Expr {
14641481
write!(f, " '{}'", &value::escape_single_quote_string(value))
14651482
}
14661483
Expr::Function(fun) => write!(f, "{fun}"),
1484+
Expr::Method(method) => write!(f, "{method}"),
14671485
Expr::Case {
14681486
operand,
14691487
conditions,
@@ -3329,6 +3347,22 @@ pub enum Statement {
33293347
channel: Ident,
33303348
payload: Option<String>,
33313349
},
3350+
/// ```sql
3351+
/// LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename
3352+
/// [PARTITION (partcol1=val1, partcol2=val2 ...)]
3353+
/// [INPUTFORMAT 'inputformat' SERDE 'serde']
3354+
/// ```
3355+
/// Loading files into tables
3356+
///
3357+
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
3358+
LoadData {
3359+
local: bool,
3360+
inpath: String,
3361+
overwrite: bool,
3362+
table_name: ObjectName,
3363+
partitioned: Option<Vec<Expr>>,
3364+
table_format: Option<HiveLoadDataFormat>,
3365+
},
33323366
}
33333367

33343368
impl fmt::Display for Statement {
@@ -3931,6 +3965,36 @@ impl fmt::Display for Statement {
39313965
Ok(())
39323966
}
39333967
Statement::CreateTable(create_table) => create_table.fmt(f),
3968+
Statement::LoadData {
3969+
local,
3970+
inpath,
3971+
overwrite,
3972+
table_name,
3973+
partitioned,
3974+
table_format,
3975+
} => {
3976+
write!(
3977+
f,
3978+
"LOAD DATA {local}INPATH '{inpath}' {overwrite}INTO TABLE {table_name}",
3979+
local = if *local { "LOCAL " } else { "" },
3980+
inpath = inpath,
3981+
overwrite = if *overwrite { "OVERWRITE " } else { "" },
3982+
table_name = table_name,
3983+
)?;
3984+
if let Some(ref parts) = &partitioned {
3985+
if !parts.is_empty() {
3986+
write!(f, " PARTITION ({})", display_comma_separated(parts))?;
3987+
}
3988+
}
3989+
if let Some(HiveLoadDataFormat {
3990+
serde,
3991+
input_format,
3992+
}) = &table_format
3993+
{
3994+
write!(f, " INPUTFORMAT {input_format} SERDE {serde}")?;
3995+
}
3996+
Ok(())
3997+
}
39343998
Statement::CreateVirtualTable {
39353999
name,
39364000
if_not_exists,
@@ -5609,6 +5673,27 @@ impl fmt::Display for FunctionArgumentClause {
56095673
}
56105674
}
56115675

5676+
/// A method call
5677+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
5678+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
5679+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
5680+
pub struct Method {
5681+
pub expr: Box<Expr>,
5682+
// always non-empty
5683+
pub method_chain: Vec<Function>,
5684+
}
5685+
5686+
impl fmt::Display for Method {
5687+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5688+
write!(
5689+
f,
5690+
"{}.{}",
5691+
self.expr,
5692+
display_separated(&self.method_chain, ".")
5693+
)
5694+
}
5695+
}
5696+
56125697
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
56135698
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
56145699
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
@@ -5816,6 +5901,14 @@ pub enum HiveRowFormat {
58165901
DELIMITED { delimiters: Vec<HiveRowDelimiter> },
58175902
}
58185903

5904+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
5905+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
5906+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
5907+
pub struct HiveLoadDataFormat {
5908+
pub serde: Expr,
5909+
pub input_format: Expr,
5910+
}
5911+
58195912
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
58205913
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
58215914
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]

src/dialect/duckdb.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,9 @@ impl Dialect for DuckDbDialect {
6666
fn supports_explain_with_utility_options(&self) -> bool {
6767
true
6868
}
69+
70+
/// See DuckDB <https://duckdb.org/docs/sql/statements/load_and_install.html#load>
71+
fn supports_load_extension(&self) -> bool {
72+
true
73+
}
6974
}

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,8 @@ impl Dialect for GenericDialect {
115115
fn supports_comment_on(&self) -> bool {
116116
true
117117
}
118+
119+
fn supports_load_extension(&self) -> bool {
120+
true
121+
}
118122
}

src/dialect/hive.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,9 @@ impl Dialect for HiveDialect {
5656
fn supports_bang_not_operator(&self) -> bool {
5757
true
5858
}
59+
60+
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
61+
fn supports_load_data(&self) -> bool {
62+
true
63+
}
5964
}

src/dialect/mod.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,15 @@ pub trait Dialect: Debug + Any {
279279
false
280280
}
281281

282+
/// Returns true if the dialect supports method calls, for example:
283+
///
284+
/// ```sql
285+
/// SELECT (SELECT ',' + name FROM sys.objects FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)')
286+
/// ```
287+
fn supports_methods(&self) -> bool {
288+
false
289+
}
290+
282291
/// Returns true if the dialect supports multiple variable assignment
283292
/// using parentheses in a `SET` variable declaration.
284293
///
@@ -611,6 +620,16 @@ pub trait Dialect: Debug + Any {
611620
false
612621
}
613622

623+
/// Returns true if the dialect supports the `LOAD DATA` statement
624+
fn supports_load_data(&self) -> bool {
625+
false
626+
}
627+
628+
/// Returns true if the dialect supports the `LOAD extension` statement
629+
fn supports_load_extension(&self) -> bool {
630+
false
631+
}
632+
614633
/// Returns true if this dialect expects the `TOP` option
615634
/// before the `ALL`/`DISTINCT` options in a `SELECT` statement.
616635
fn supports_top_before_distinct(&self) -> bool {

src/dialect/mssql.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,8 @@ impl Dialect for MsSqlDialect {
6262
fn supports_boolean_literals(&self) -> bool {
6363
false
6464
}
65+
66+
fn supports_methods(&self) -> bool {
67+
true
68+
}
6569
}

src/keywords.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ define_keywords!(
389389
INITIALLY,
390390
INNER,
391391
INOUT,
392+
INPATH,
392393
INPUT,
393394
INPUTFORMAT,
394395
INSENSITIVE,

src/parser/mod.rs

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,7 @@ impl<'a> Parser<'a> {
543543
Keyword::INSTALL if dialect_of!(self is DuckDbDialect | GenericDialect) => {
544544
self.parse_install()
545545
}
546-
// `LOAD` is duckdb specific https://duckdb.org/docs/extensions/overview
547-
Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => {
548-
self.parse_load()
549-
}
546+
Keyword::LOAD => self.parse_load(),
550547
// `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
551548
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
552549
self.parse_optimize_table()
@@ -1317,6 +1314,7 @@ impl<'a> Parser<'a> {
13171314
}
13181315
};
13191316
self.expect_token(&Token::RParen)?;
1317+
let expr = self.try_parse_method(expr)?;
13201318
if !self.consume_token(&Token::Period) {
13211319
Ok(expr)
13221320
} else {
@@ -1346,6 +1344,9 @@ impl<'a> Parser<'a> {
13461344
}
13471345
_ => self.expected("an expression", next_token),
13481346
}?;
1347+
1348+
let expr = self.try_parse_method(expr)?;
1349+
13491350
if self.parse_keyword(Keyword::COLLATE) {
13501351
Ok(Expr::Collate {
13511352
expr: Box::new(expr),
@@ -1403,6 +1404,41 @@ impl<'a> Parser<'a> {
14031404
})
14041405
}
14051406

1407+
/// Parses method call expression
1408+
fn try_parse_method(&mut self, expr: Expr) -> Result<Expr, ParserError> {
1409+
if !self.dialect.supports_methods() {
1410+
return Ok(expr);
1411+
}
1412+
let method_chain = self.maybe_parse(|p| {
1413+
let mut method_chain = Vec::new();
1414+
while p.consume_token(&Token::Period) {
1415+
let tok = p.next_token();
1416+
let name = match tok.token {
1417+
Token::Word(word) => word.to_ident(),
1418+
_ => return p.expected("identifier", tok),
1419+
};
1420+
let func = match p.parse_function(ObjectName(vec![name]))? {
1421+
Expr::Function(func) => func,
1422+
_ => return p.expected("function", p.peek_token()),
1423+
};
1424+
method_chain.push(func);
1425+
}
1426+
if !method_chain.is_empty() {
1427+
Ok(method_chain)
1428+
} else {
1429+
p.expected("function", p.peek_token())
1430+
}
1431+
})?;
1432+
if let Some(method_chain) = method_chain {
1433+
Ok(Expr::Method(Method {
1434+
expr: Box::new(expr),
1435+
method_chain,
1436+
}))
1437+
} else {
1438+
Ok(expr)
1439+
}
1440+
}
1441+
14061442
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
14071443
self.expect_token(&Token::LParen)?;
14081444

@@ -11201,6 +11237,22 @@ impl<'a> Parser<'a> {
1120111237
}
1120211238
}
1120311239

11240+
pub fn parse_load_data_table_format(
11241+
&mut self,
11242+
) -> Result<Option<HiveLoadDataFormat>, ParserError> {
11243+
if self.parse_keyword(Keyword::INPUTFORMAT) {
11244+
let input_format = self.parse_expr()?;
11245+
self.expect_keyword(Keyword::SERDE)?;
11246+
let serde = self.parse_expr()?;
11247+
Ok(Some(HiveLoadDataFormat {
11248+
input_format,
11249+
serde,
11250+
}))
11251+
} else {
11252+
Ok(None)
11253+
}
11254+
}
11255+
1120411256
/// Parse an UPDATE statement, returning a `Box`ed SetExpr
1120511257
///
1120611258
/// This is used to reduce the size of the stack frames in debug builds
@@ -12203,10 +12255,35 @@ impl<'a> Parser<'a> {
1220312255
Ok(Statement::Install { extension_name })
1220412256
}
1220512257

12206-
/// `LOAD [extension_name]`
12258+
/// Parse a SQL LOAD statement
1220712259
pub fn parse_load(&mut self) -> Result<Statement, ParserError> {
12208-
let extension_name = self.parse_identifier(false)?;
12209-
Ok(Statement::Load { extension_name })
12260+
if self.dialect.supports_load_extension() {
12261+
let extension_name = self.parse_identifier(false)?;
12262+
Ok(Statement::Load { extension_name })
12263+
} else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() {
12264+
let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some();
12265+
self.expect_keyword(Keyword::INPATH)?;
12266+
let inpath = self.parse_literal_string()?;
12267+
let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some();
12268+
self.expect_keyword(Keyword::INTO)?;
12269+
self.expect_keyword(Keyword::TABLE)?;
12270+
let table_name = self.parse_object_name(false)?;
12271+
let partitioned = self.parse_insert_partition()?;
12272+
let table_format = self.parse_load_data_table_format()?;
12273+
Ok(Statement::LoadData {
12274+
local,
12275+
inpath,
12276+
overwrite,
12277+
table_name,
12278+
partitioned,
12279+
table_format,
12280+
})
12281+
} else {
12282+
self.expected(
12283+
"`DATA` or an extension name after `LOAD`",
12284+
self.peek_token(),
12285+
)
12286+
}
1221012287
}
1221112288

1221212289
/// ```sql

0 commit comments

Comments
 (0)