Skip to content

Commit b3b2f3d

Browse files
committed
Parse Snowflake COPY INTO <location>
1 parent 5da702f commit b3b2f3d

File tree

5 files changed

+276
-73
lines changed

5 files changed

+276
-73
lines changed

src/ast/helpers/stmt_data_loading.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub enum DataLoadingOptionType {
5858
STRING,
5959
BOOLEAN,
6060
ENUM,
61+
NUMBER,
6162
}
6263

6364
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -128,12 +129,9 @@ impl fmt::Display for DataLoadingOption {
128129
DataLoadingOptionType::STRING => {
129130
write!(f, "{}='{}'", self.option_name, self.value)?;
130131
}
131-
DataLoadingOptionType::ENUM => {
132-
// single quote is omitted
133-
write!(f, "{}={}", self.option_name, self.value)?;
134-
}
135-
DataLoadingOptionType::BOOLEAN => {
136-
// single quote is omitted
132+
DataLoadingOptionType::ENUM
133+
| DataLoadingOptionType::BOOLEAN
134+
| DataLoadingOptionType::NUMBER => {
137135
write!(f, "{}={}", self.option_name, self.value)?;
138136
}
139137
}

src/ast/mod.rs

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,14 +2461,14 @@ pub enum Statement {
24612461
values: Vec<Option<String>>,
24622462
},
24632463
/// ```sql
2464-
/// COPY INTO
2464+
/// COPY INTO <table>
24652465
/// ```
24662466
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
24672467
/// Copy Into syntax available for Snowflake is different than the one implemented in
24682468
/// Postgres. Although they share common prefix, it is reasonable to implement them
24692469
/// in different enums. This can be refactored later once custom dialects
24702470
/// are allowed to have custom Statements.
2471-
CopyIntoSnowflake {
2471+
CopyIntoSnowflakeTable {
24722472
into: ObjectName,
24732473
from_stage: ObjectName,
24742474
from_stage_alias: Option<Ident>,
@@ -2481,6 +2481,19 @@ pub enum Statement {
24812481
validation_mode: Option<String>,
24822482
},
24832483
/// ```sql
2484+
/// COPY INTO <location>
2485+
/// ```
2486+
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
2487+
CopyIntoSnowflakeLocation {
2488+
into: ObjectName,
2489+
from_table: Option<ObjectName>,
2490+
from_query: Option<Box<Query>>,
2491+
stage_params: StageParamsObject,
2492+
partition: Option<Expr>,
2493+
file_format: DataLoadingOptions,
2494+
copy_options: DataLoadingOptions,
2495+
},
2496+
/// ```sql
24842497
/// CLOSE
24852498
/// ```
24862499
/// Closes the portal underlying an open cursor.
@@ -4951,7 +4964,7 @@ impl fmt::Display for Statement {
49514964
}
49524965
Ok(())
49534966
}
4954-
Statement::CopyIntoSnowflake {
4967+
Statement::CopyIntoSnowflakeTable {
49554968
into,
49564969
from_stage,
49574970
from_stage_alias,
@@ -5009,6 +5022,33 @@ impl fmt::Display for Statement {
50095022
}
50105023
Ok(())
50115024
}
5025+
Statement::CopyIntoSnowflakeLocation {
5026+
into,
5027+
from_table,
5028+
from_query,
5029+
stage_params,
5030+
partition,
5031+
file_format,
5032+
copy_options,
5033+
} => {
5034+
write!(f, "COPY INTO {into} FROM")?;
5035+
if let Some(from_table) = from_table {
5036+
write!(f, " {from_table}")?;
5037+
} else if let Some(from_query) = from_query {
5038+
write!(f, " ({from_query})")?;
5039+
}
5040+
write!(f, "{stage_params}")?;
5041+
if let Some(partition) = partition {
5042+
write!(f, " PARTITION BY {partition}")?;
5043+
}
5044+
if !file_format.options.is_empty() {
5045+
write!(f, " FILE_FORMAT=({})", file_format)?;
5046+
}
5047+
if !copy_options.options.is_empty() {
5048+
write!(f, " {}", copy_options)?;
5049+
}
5050+
Ok(())
5051+
}
50125052
Statement::CreateType {
50135053
name,
50145054
representation,

src/ast/spans.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ impl Spanned for Statement {
330330
legacy_options: _,
331331
values: _,
332332
} => source.span(),
333-
Statement::CopyIntoSnowflake {
333+
Statement::CopyIntoSnowflakeTable {
334334
into: _,
335335
from_stage: _,
336336
from_stage_alias: _,
@@ -342,6 +342,15 @@ impl Spanned for Statement {
342342
copy_options: _,
343343
validation_mode: _,
344344
} => Span::empty(),
345+
Statement::CopyIntoSnowflakeLocation {
346+
into: _,
347+
from_table: _,
348+
from_query: _,
349+
stage_params: _,
350+
partition: _,
351+
file_format: _,
352+
copy_options: _,
353+
} => Span::empty(),
345354
Statement::Close { cursor } => match cursor {
346355
CloseCursor::All => Span::empty(),
347356
CloseCursor::Specific { name } => name.span,

src/dialect/snowflake.rs

Lines changed: 118 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::ast::{
3030
use crate::dialect::{Dialect, Precedence};
3131
use crate::keywords::Keyword;
3232
use crate::parser::{Parser, ParserError};
33-
use crate::tokenizer::Token;
33+
use crate::tokenizer::{Token, Word};
3434
#[cfg(not(feature = "std"))]
3535
use alloc::string::String;
3636
#[cfg(not(feature = "std"))]
@@ -611,7 +611,82 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, Par
611611
}
612612
}
613613

614+
/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
615+
/// and `COPY INTO <location>` which have different syntax.
614616
pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
617+
if is_copy_into_location(parser) {
618+
parse_copy_into_location(parser)
619+
} else {
620+
parse_copy_into_table(parser)
621+
}
622+
}
623+
624+
/// Returns true if the `COPY INTO` statement is a `COPY INTO <location>`
625+
/// by peeking at the prefix of the target's object name and trying to
626+
/// determine if it's a Snowflake stage or a table.
627+
fn is_copy_into_location(parser: &mut Parser) -> bool {
628+
match parser.peek_token().token {
629+
// Indicates an internal stage
630+
Token::AtSign => true,
631+
// Indicates an external stage, i.e. s3://, gcs:// or azure://
632+
Token::SingleQuotedString(s) if s.contains("://") => true,
633+
_ => false,
634+
}
635+
}
636+
637+
fn parse_copy_into_location(parser: &mut Parser) -> Result<Statement, ParserError> {
638+
let into: ObjectName = parse_snowflake_stage_name(parser)?;
639+
parser.expect_keyword_is(Keyword::FROM)?;
640+
// Two options: `FROM (query)` or `FROM <table>`
641+
let (from_table, from_query) = match parser.next_token().token {
642+
Token::LParen => {
643+
let query = parser.parse_query()?;
644+
parser.expect_token(&Token::RParen)?;
645+
(None, Some(query))
646+
}
647+
_ => {
648+
parser.prev_token();
649+
let table = parser.parse_object_name(true)?;
650+
(Some(table), None)
651+
}
652+
};
653+
let stage_params = parse_stage_params(parser)?;
654+
655+
// The order of the next options is not defined, so we need to loop
656+
// until we reach the end of the statement
657+
let mut partition = None;
658+
let mut file_format = Vec::new();
659+
let mut options: Vec<DataLoadingOption> = Vec::new();
660+
loop {
661+
if parser.parse_keyword(Keyword::FILE_FORMAT) {
662+
parser.expect_token(&Token::Eq)?;
663+
file_format = parse_parentheses_options(parser)?;
664+
} else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
665+
partition = Some(parser.parse_expr()?)
666+
} else {
667+
match parser.next_token().token {
668+
Token::SemiColon | Token::EOF => break,
669+
Token::Comma => continue,
670+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
671+
_ => return parser.expected("another option, ; or EOF'", parser.peek_token()),
672+
}
673+
}
674+
}
675+
676+
Ok(Statement::CopyIntoSnowflakeLocation {
677+
into,
678+
from_table,
679+
from_query,
680+
stage_params,
681+
partition,
682+
file_format: DataLoadingOptions {
683+
options: file_format,
684+
},
685+
copy_options: DataLoadingOptions { options },
686+
})
687+
}
688+
689+
fn parse_copy_into_table(parser: &mut Parser) -> Result<Statement, ParserError> {
615690
let into: ObjectName = parse_snowflake_stage_name(parser)?;
616691
let mut files: Vec<String> = vec![];
617692
let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
@@ -712,7 +787,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
712787
validation_mode = Some(parser.next_token().token.to_string());
713788
}
714789

715-
Ok(Statement::CopyIntoSnowflake {
790+
Ok(Statement::CopyIntoSnowflakeTable {
716791
into,
717792
from_stage,
718793
from_stage_alias,
@@ -876,55 +951,55 @@ fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserEr
876951
///
877952
fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
878953
let mut options: Vec<DataLoadingOption> = Vec::new();
879-
880954
parser.expect_token(&Token::LParen)?;
881955
loop {
882956
match parser.next_token().token {
883957
Token::RParen => break,
884-
Token::Word(key) => {
885-
parser.expect_token(&Token::Eq)?;
886-
if parser.parse_keyword(Keyword::TRUE) {
887-
options.push(DataLoadingOption {
888-
option_name: key.value,
889-
option_type: DataLoadingOptionType::BOOLEAN,
890-
value: "TRUE".to_string(),
891-
});
892-
Ok(())
893-
} else if parser.parse_keyword(Keyword::FALSE) {
894-
options.push(DataLoadingOption {
895-
option_name: key.value,
896-
option_type: DataLoadingOptionType::BOOLEAN,
897-
value: "FALSE".to_string(),
898-
});
899-
Ok(())
900-
} else {
901-
match parser.next_token().token {
902-
Token::SingleQuotedString(value) => {
903-
options.push(DataLoadingOption {
904-
option_name: key.value,
905-
option_type: DataLoadingOptionType::STRING,
906-
value,
907-
});
908-
Ok(())
909-
}
910-
Token::Word(word) => {
911-
options.push(DataLoadingOption {
912-
option_name: key.value,
913-
option_type: DataLoadingOptionType::ENUM,
914-
value: word.value,
915-
});
916-
Ok(())
917-
}
918-
_ => parser.expected("expected option value", parser.peek_token()),
919-
}
920-
}
921-
}
922-
_ => parser.expected("another option or ')'", parser.peek_token()),
923-
}?;
958+
Token::Comma => continue,
959+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
960+
_ => return parser.expected("another option or ')'", parser.peek_token()),
961+
};
924962
}
925963
Ok(options)
926964
}
927965

966+
/// Parses a `KEY = VALUE` construct based on the specified key
967+
fn parse_copy_option(parser: &mut Parser, key: Word) -> Result<DataLoadingOption, ParserError> {
968+
parser.expect_token(&Token::Eq)?;
969+
if parser.parse_keyword(Keyword::TRUE) {
970+
Ok(DataLoadingOption {
971+
option_name: key.value,
972+
option_type: DataLoadingOptionType::BOOLEAN,
973+
value: "TRUE".to_string(),
974+
})
975+
} else if parser.parse_keyword(Keyword::FALSE) {
976+
Ok(DataLoadingOption {
977+
option_name: key.value,
978+
option_type: DataLoadingOptionType::BOOLEAN,
979+
value: "FALSE".to_string(),
980+
})
981+
} else {
982+
match parser.next_token().token {
983+
Token::SingleQuotedString(value) => Ok(DataLoadingOption {
984+
option_name: key.value,
985+
option_type: DataLoadingOptionType::STRING,
986+
value,
987+
}),
988+
Token::Word(word) => Ok(DataLoadingOption {
989+
option_name: key.value,
990+
option_type: DataLoadingOptionType::ENUM,
991+
value: word.value,
992+
}),
993+
Token::Number(n, _) => Ok(DataLoadingOption {
994+
option_name: key.value,
995+
option_type: DataLoadingOptionType::NUMBER,
996+
value: n,
997+
}),
998+
_ => parser.expected("expected option value", parser.peek_token()),
999+
}
1000+
}
1001+
}
1002+
9281003
/// Parsing a property of identity or autoincrement column option
9291004
/// Syntax:
9301005
/// ```sql

0 commit comments

Comments
 (0)