Skip to content

Commit b12f8bb

Browse files
committed
Parse Snowflake COPY INTO <location>
1 parent ef072be commit b12f8bb

File tree

5 files changed

+276
-73
lines changed

5 files changed

+276
-73
lines changed

src/ast/helpers/stmt_data_loading.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub enum DataLoadingOptionType {
5858
STRING,
5959
BOOLEAN,
6060
ENUM,
61+
NUMBER,
6162
}
6263

6364
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -128,12 +129,9 @@ impl fmt::Display for DataLoadingOption {
128129
DataLoadingOptionType::STRING => {
129130
write!(f, "{}='{}'", self.option_name, self.value)?;
130131
}
131-
DataLoadingOptionType::ENUM => {
132-
// single quote is omitted
133-
write!(f, "{}={}", self.option_name, self.value)?;
134-
}
135-
DataLoadingOptionType::BOOLEAN => {
136-
// single quote is omitted
132+
DataLoadingOptionType::ENUM
133+
| DataLoadingOptionType::BOOLEAN
134+
| DataLoadingOptionType::NUMBER => {
137135
write!(f, "{}={}", self.option_name, self.value)?;
138136
}
139137
}

src/ast/mod.rs

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,14 +2461,14 @@ pub enum Statement {
24612461
values: Vec<Option<String>>,
24622462
},
24632463
/// ```sql
2464-
/// COPY INTO
2464+
/// COPY INTO <table>
24652465
/// ```
24662466
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
24672467
/// Copy Into syntax available for Snowflake is different than the one implemented in
24682468
/// Postgres. Although they share common prefix, it is reasonable to implement them
24692469
/// in different enums. This can be refactored later once custom dialects
24702470
/// are allowed to have custom Statements.
2471-
CopyIntoSnowflake {
2471+
CopyIntoSnowflakeTable {
24722472
into: ObjectName,
24732473
from_stage: ObjectName,
24742474
from_stage_alias: Option<Ident>,
@@ -2481,6 +2481,19 @@ pub enum Statement {
24812481
validation_mode: Option<String>,
24822482
},
24832483
/// ```sql
2484+
/// COPY INTO <location>
2485+
/// ```
2486+
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
2487+
CopyIntoSnowflakeLocation {
2488+
into: ObjectName,
2489+
from_table: Option<ObjectName>,
2490+
from_query: Option<Box<Query>>,
2491+
stage_params: StageParamsObject,
2492+
partition: Option<Expr>,
2493+
file_format: DataLoadingOptions,
2494+
copy_options: DataLoadingOptions,
2495+
},
2496+
/// ```sql
24842497
/// CLOSE
24852498
/// ```
24862499
/// Closes the portal underlying an open cursor.
@@ -4951,7 +4964,7 @@ impl fmt::Display for Statement {
49514964
}
49524965
Ok(())
49534966
}
4954-
Statement::CopyIntoSnowflake {
4967+
Statement::CopyIntoSnowflakeTable {
49554968
into,
49564969
from_stage,
49574970
from_stage_alias,
@@ -5009,6 +5022,33 @@ impl fmt::Display for Statement {
50095022
}
50105023
Ok(())
50115024
}
5025+
Statement::CopyIntoSnowflakeLocation {
5026+
into,
5027+
from_table,
5028+
from_query,
5029+
stage_params,
5030+
partition,
5031+
file_format,
5032+
copy_options,
5033+
} => {
5034+
write!(f, "COPY INTO {into} FROM")?;
5035+
if let Some(from_table) = from_table {
5036+
write!(f, " {from_table}")?;
5037+
} else if let Some(from_query) = from_query {
5038+
write!(f, " ({from_query})")?;
5039+
}
5040+
write!(f, "{stage_params}")?;
5041+
if let Some(partition) = partition {
5042+
write!(f, " PARTITION BY {partition}")?;
5043+
}
5044+
if !file_format.options.is_empty() {
5045+
write!(f, " FILE_FORMAT=({})", file_format)?;
5046+
}
5047+
if !copy_options.options.is_empty() {
5048+
write!(f, " {}", copy_options)?;
5049+
}
5050+
Ok(())
5051+
}
50125052
Statement::CreateType {
50135053
name,
50145054
representation,

src/ast/spans.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ impl Spanned for Statement {
330330
legacy_options: _,
331331
values: _,
332332
} => source.span(),
333-
Statement::CopyIntoSnowflake {
333+
Statement::CopyIntoSnowflakeTable {
334334
into: _,
335335
from_stage: _,
336336
from_stage_alias: _,
@@ -342,6 +342,15 @@ impl Spanned for Statement {
342342
copy_options: _,
343343
validation_mode: _,
344344
} => Span::empty(),
345+
Statement::CopyIntoSnowflakeLocation {
346+
into: _,
347+
from_table: _,
348+
from_query: _,
349+
stage_params: _,
350+
partition: _,
351+
file_format: _,
352+
copy_options: _,
353+
} => Span::empty(),
345354
Statement::Close { cursor } => match cursor {
346355
CloseCursor::All => Span::empty(),
347356
CloseCursor::Specific { name } => name.span,

src/dialect/snowflake.rs

Lines changed: 118 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::ast::{
3030
use crate::dialect::{Dialect, Precedence};
3131
use crate::keywords::Keyword;
3232
use crate::parser::{Parser, ParserError};
33-
use crate::tokenizer::Token;
33+
use crate::tokenizer::{Token, Word};
3434
#[cfg(not(feature = "std"))]
3535
use alloc::string::String;
3636
#[cfg(not(feature = "std"))]
@@ -660,7 +660,82 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, Par
660660
}
661661
}
662662

663+
/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
664+
/// and `COPY INTO <location>` which have different syntax.
663665
pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
666+
if is_copy_into_location(parser) {
667+
parse_copy_into_location(parser)
668+
} else {
669+
parse_copy_into_table(parser)
670+
}
671+
}
672+
673+
/// Returns true if the `COPY INTO` statement is a `COPY INTO <location>`
674+
/// by peeking at the prefix of the target's object name and trying to
675+
/// determine if it's a Snowflake stage or a table.
676+
fn is_copy_into_location(parser: &mut Parser) -> bool {
677+
match parser.peek_token().token {
678+
// Indicates an internal stage
679+
Token::AtSign => true,
680+
// Indicates an external stage, i.e. s3://, gcs:// or azure://
681+
Token::SingleQuotedString(s) if s.contains("://") => true,
682+
_ => false,
683+
}
684+
}
685+
686+
fn parse_copy_into_location(parser: &mut Parser) -> Result<Statement, ParserError> {
687+
let into: ObjectName = parse_snowflake_stage_name(parser)?;
688+
parser.expect_keyword_is(Keyword::FROM)?;
689+
// Two options: `FROM (query)` or `FROM <table>`
690+
let (from_table, from_query) = match parser.next_token().token {
691+
Token::LParen => {
692+
let query = parser.parse_query()?;
693+
parser.expect_token(&Token::RParen)?;
694+
(None, Some(query))
695+
}
696+
_ => {
697+
parser.prev_token();
698+
let table = parser.parse_object_name(true)?;
699+
(Some(table), None)
700+
}
701+
};
702+
let stage_params = parse_stage_params(parser)?;
703+
704+
// The order of the next options is not defined, so we need to loop
705+
// until we reach the end of the statement
706+
let mut partition = None;
707+
let mut file_format = Vec::new();
708+
let mut options: Vec<DataLoadingOption> = Vec::new();
709+
loop {
710+
if parser.parse_keyword(Keyword::FILE_FORMAT) {
711+
parser.expect_token(&Token::Eq)?;
712+
file_format = parse_parentheses_options(parser)?;
713+
} else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
714+
partition = Some(parser.parse_expr()?)
715+
} else {
716+
match parser.next_token().token {
717+
Token::SemiColon | Token::EOF => break,
718+
Token::Comma => continue,
719+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
720+
_ => return parser.expected("another option, ; or EOF'", parser.peek_token()),
721+
}
722+
}
723+
}
724+
725+
Ok(Statement::CopyIntoSnowflakeLocation {
726+
into,
727+
from_table,
728+
from_query,
729+
stage_params,
730+
partition,
731+
file_format: DataLoadingOptions {
732+
options: file_format,
733+
},
734+
copy_options: DataLoadingOptions { options },
735+
})
736+
}
737+
738+
fn parse_copy_into_table(parser: &mut Parser) -> Result<Statement, ParserError> {
664739
let into: ObjectName = parse_snowflake_stage_name(parser)?;
665740
let mut files: Vec<String> = vec![];
666741
let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
@@ -761,7 +836,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
761836
validation_mode = Some(parser.next_token().token.to_string());
762837
}
763838

764-
Ok(Statement::CopyIntoSnowflake {
839+
Ok(Statement::CopyIntoSnowflakeTable {
765840
into,
766841
from_stage,
767842
from_stage_alias,
@@ -925,55 +1000,55 @@ fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserEr
9251000
///
9261001
fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
9271002
let mut options: Vec<DataLoadingOption> = Vec::new();
928-
9291003
parser.expect_token(&Token::LParen)?;
9301004
loop {
9311005
match parser.next_token().token {
9321006
Token::RParen => break,
933-
Token::Word(key) => {
934-
parser.expect_token(&Token::Eq)?;
935-
if parser.parse_keyword(Keyword::TRUE) {
936-
options.push(DataLoadingOption {
937-
option_name: key.value,
938-
option_type: DataLoadingOptionType::BOOLEAN,
939-
value: "TRUE".to_string(),
940-
});
941-
Ok(())
942-
} else if parser.parse_keyword(Keyword::FALSE) {
943-
options.push(DataLoadingOption {
944-
option_name: key.value,
945-
option_type: DataLoadingOptionType::BOOLEAN,
946-
value: "FALSE".to_string(),
947-
});
948-
Ok(())
949-
} else {
950-
match parser.next_token().token {
951-
Token::SingleQuotedString(value) => {
952-
options.push(DataLoadingOption {
953-
option_name: key.value,
954-
option_type: DataLoadingOptionType::STRING,
955-
value,
956-
});
957-
Ok(())
958-
}
959-
Token::Word(word) => {
960-
options.push(DataLoadingOption {
961-
option_name: key.value,
962-
option_type: DataLoadingOptionType::ENUM,
963-
value: word.value,
964-
});
965-
Ok(())
966-
}
967-
_ => parser.expected("expected option value", parser.peek_token()),
968-
}
969-
}
970-
}
971-
_ => parser.expected("another option or ')'", parser.peek_token()),
972-
}?;
1007+
Token::Comma => continue,
1008+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
1009+
_ => return parser.expected("another option or ')'", parser.peek_token()),
1010+
};
9731011
}
9741012
Ok(options)
9751013
}
9761014

1015+
/// Parses a `KEY = VALUE` construct based on the specified key
1016+
fn parse_copy_option(parser: &mut Parser, key: Word) -> Result<DataLoadingOption, ParserError> {
1017+
parser.expect_token(&Token::Eq)?;
1018+
if parser.parse_keyword(Keyword::TRUE) {
1019+
Ok(DataLoadingOption {
1020+
option_name: key.value,
1021+
option_type: DataLoadingOptionType::BOOLEAN,
1022+
value: "TRUE".to_string(),
1023+
})
1024+
} else if parser.parse_keyword(Keyword::FALSE) {
1025+
Ok(DataLoadingOption {
1026+
option_name: key.value,
1027+
option_type: DataLoadingOptionType::BOOLEAN,
1028+
value: "FALSE".to_string(),
1029+
})
1030+
} else {
1031+
match parser.next_token().token {
1032+
Token::SingleQuotedString(value) => Ok(DataLoadingOption {
1033+
option_name: key.value,
1034+
option_type: DataLoadingOptionType::STRING,
1035+
value,
1036+
}),
1037+
Token::Word(word) => Ok(DataLoadingOption {
1038+
option_name: key.value,
1039+
option_type: DataLoadingOptionType::ENUM,
1040+
value: word.value,
1041+
}),
1042+
Token::Number(n, _) => Ok(DataLoadingOption {
1043+
option_name: key.value,
1044+
option_type: DataLoadingOptionType::NUMBER,
1045+
value: n,
1046+
}),
1047+
_ => parser.expected("expected option value", parser.peek_token()),
1048+
}
1049+
}
1050+
}
1051+
9771052
/// Parsing a property of identity or autoincrement column option
9781053
/// Syntax:
9791054
/// ```sql

0 commit comments

Comments
 (0)