Skip to content

Commit 4b11bab

Browse files
committed
Parse Snowflake COPY INTO <location>
1 parent e9498d5 commit 4b11bab

File tree

5 files changed

+281
-73
lines changed

5 files changed

+281
-73
lines changed

src/ast/helpers/stmt_data_loading.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub enum DataLoadingOptionType {
5858
STRING,
5959
BOOLEAN,
6060
ENUM,
61+
NUMBER,
6162
}
6263

6364
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -128,12 +129,9 @@ impl fmt::Display for DataLoadingOption {
128129
DataLoadingOptionType::STRING => {
129130
write!(f, "{}='{}'", self.option_name, self.value)?;
130131
}
131-
DataLoadingOptionType::ENUM => {
132-
// single quote is omitted
133-
write!(f, "{}={}", self.option_name, self.value)?;
134-
}
135-
DataLoadingOptionType::BOOLEAN => {
136-
// single quote is omitted
132+
DataLoadingOptionType::ENUM
133+
| DataLoadingOptionType::BOOLEAN
134+
| DataLoadingOptionType::NUMBER => {
137135
write!(f, "{}={}", self.option_name, self.value)?;
138136
}
139137
}

src/ast/mod.rs

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,14 +2461,14 @@ pub enum Statement {
24612461
values: Vec<Option<String>>,
24622462
},
24632463
/// ```sql
2464-
/// COPY INTO
2464+
/// COPY INTO <table>
24652465
/// ```
24662466
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
24672467
/// Copy Into syntax available for Snowflake is different than the one implemented in
24682468
/// Postgres. Although they share common prefix, it is reasonable to implement them
24692469
/// in different enums. This can be refactored later once custom dialects
24702470
/// are allowed to have custom Statements.
2471-
CopyIntoSnowflake {
2471+
CopyIntoSnowflakeTable {
24722472
into: ObjectName,
24732473
from_stage: ObjectName,
24742474
from_stage_alias: Option<Ident>,
@@ -2481,6 +2481,19 @@ pub enum Statement {
24812481
validation_mode: Option<String>,
24822482
},
24832483
/// ```sql
2484+
/// COPY INTO <location>
2485+
/// ```
2486+
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
2487+
CopyIntoSnowflakeLocation {
2488+
into: ObjectName,
2489+
from_table: Option<ObjectName>,
2490+
from_query: Option<Box<Query>>,
2491+
stage_params: StageParamsObject,
2492+
partition: Option<Expr>,
2493+
file_format: DataLoadingOptions,
2494+
copy_options: DataLoadingOptions,
2495+
},
2496+
/// ```sql
24842497
/// CLOSE
24852498
/// ```
24862499
/// Closes the portal underlying an open cursor.
@@ -4951,7 +4964,7 @@ impl fmt::Display for Statement {
49514964
}
49524965
Ok(())
49534966
}
4954-
Statement::CopyIntoSnowflake {
4967+
Statement::CopyIntoSnowflakeTable {
49554968
into,
49564969
from_stage,
49574970
from_stage_alias,
@@ -5009,6 +5022,33 @@ impl fmt::Display for Statement {
50095022
}
50105023
Ok(())
50115024
}
5025+
Statement::CopyIntoSnowflakeLocation {
5026+
into,
5027+
from_table,
5028+
from_query,
5029+
stage_params,
5030+
partition,
5031+
file_format,
5032+
copy_options,
5033+
} => {
5034+
write!(f, "COPY INTO {into} FROM")?;
5035+
if let Some(from_table) = from_table {
5036+
write!(f, " {from_table}")?;
5037+
} else if let Some(from_query) = from_query {
5038+
write!(f, " ({from_query})")?;
5039+
}
5040+
write!(f, "{stage_params}")?;
5041+
if let Some(partition) = partition {
5042+
write!(f, " PARTITION BY {partition}")?;
5043+
}
5044+
if !file_format.options.is_empty() {
5045+
write!(f, " FILE_FORMAT=({})", file_format)?;
5046+
}
5047+
if !copy_options.options.is_empty() {
5048+
write!(f, " {}", copy_options)?;
5049+
}
5050+
Ok(())
5051+
}
50125052
Statement::CreateType {
50135053
name,
50145054
representation,

src/ast/spans.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ impl Spanned for Statement {
330330
legacy_options: _,
331331
values: _,
332332
} => source.span(),
333-
Statement::CopyIntoSnowflake {
333+
Statement::CopyIntoSnowflakeTable {
334334
into: _,
335335
from_stage: _,
336336
from_stage_alias: _,
@@ -342,6 +342,15 @@ impl Spanned for Statement {
342342
copy_options: _,
343343
validation_mode: _,
344344
} => Span::empty(),
345+
Statement::CopyIntoSnowflakeLocation {
346+
into: _,
347+
from_table: _,
348+
from_query: _,
349+
stage_params: _,
350+
partition: _,
351+
file_format: _,
352+
copy_options: _,
353+
} => Span::empty(),
345354
Statement::Close { cursor } => match cursor {
346355
CloseCursor::All => Span::empty(),
347356
CloseCursor::Specific { name } => name.span,

src/dialect/snowflake.rs

Lines changed: 118 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::ast::{
3030
use crate::dialect::{Dialect, Precedence};
3131
use crate::keywords::Keyword;
3232
use crate::parser::{Parser, ParserError};
33-
use crate::tokenizer::Token;
33+
use crate::tokenizer::{Token, Word};
3434
#[cfg(not(feature = "std"))]
3535
use alloc::string::String;
3636
#[cfg(not(feature = "std"))]
@@ -606,7 +606,82 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, Par
606606
}
607607
}
608608

609+
/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
610+
/// and `COPY INTO <location>` which have different syntax.
609611
pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
612+
if is_copy_into_location(parser) {
613+
parse_copy_into_location(parser)
614+
} else {
615+
parse_copy_into_table(parser)
616+
}
617+
}
618+
619+
/// Returns true if the `COPY INTO` statement is a `COPY INTO <location>`
620+
/// by peeking at the prefix of the target's object name and trying to
621+
/// determine if it's a Snowflake stage or a table.
622+
fn is_copy_into_location(parser: &mut Parser) -> bool {
623+
match parser.peek_token().token {
624+
// Indicates an internal stage
625+
Token::AtSign => true,
626+
// Indicates an external stage, i.e. s3://, gcs:// or azure://
627+
Token::SingleQuotedString(s) if s.contains("://") => true,
628+
_ => false,
629+
}
630+
}
631+
632+
fn parse_copy_into_location(parser: &mut Parser) -> Result<Statement, ParserError> {
633+
let into: ObjectName = parse_snowflake_stage_name(parser)?;
634+
parser.expect_keyword_is(Keyword::FROM)?;
635+
// Two options: `FROM (query)` or `FROM <table>`
636+
let (from_table, from_query) = match parser.next_token().token {
637+
Token::LParen => {
638+
let query = parser.parse_query()?;
639+
parser.expect_token(&Token::RParen)?;
640+
(None, Some(query))
641+
}
642+
_ => {
643+
parser.prev_token();
644+
let table = parser.parse_object_name(true)?;
645+
(Some(table), None)
646+
}
647+
};
648+
let stage_params = parse_stage_params(parser)?;
649+
650+
// The order of the next options is not defined, so we need to loop
651+
// until we reach the end of the statement
652+
let mut partition = None;
653+
let mut file_format = Vec::new();
654+
let mut options: Vec<DataLoadingOption> = Vec::new();
655+
loop {
656+
if parser.parse_keyword(Keyword::FILE_FORMAT) {
657+
parser.expect_token(&Token::Eq)?;
658+
file_format = parse_parentheses_options(parser)?;
659+
} else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
660+
partition = Some(parser.parse_expr()?)
661+
} else {
662+
match parser.next_token().token {
663+
Token::SemiColon | Token::EOF => break,
664+
Token::Comma => continue,
665+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
666+
_ => return parser.expected("another option, ; or EOF'", parser.peek_token()),
667+
}
668+
}
669+
}
670+
671+
Ok(Statement::CopyIntoSnowflakeLocation {
672+
into,
673+
from_table,
674+
from_query,
675+
stage_params,
676+
partition,
677+
file_format: DataLoadingOptions {
678+
options: file_format,
679+
},
680+
copy_options: DataLoadingOptions { options },
681+
})
682+
}
683+
684+
fn parse_copy_into_table(parser: &mut Parser) -> Result<Statement, ParserError> {
610685
let into: ObjectName = parse_snowflake_stage_name(parser)?;
611686
let mut files: Vec<String> = vec![];
612687
let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
@@ -707,7 +782,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
707782
validation_mode = Some(parser.next_token().token.to_string());
708783
}
709784

710-
Ok(Statement::CopyIntoSnowflake {
785+
Ok(Statement::CopyIntoSnowflakeTable {
711786
into,
712787
from_stage,
713788
from_stage_alias,
@@ -871,55 +946,55 @@ fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserEr
871946
///
872947
fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
873948
let mut options: Vec<DataLoadingOption> = Vec::new();
874-
875949
parser.expect_token(&Token::LParen)?;
876950
loop {
877951
match parser.next_token().token {
878952
Token::RParen => break,
879-
Token::Word(key) => {
880-
parser.expect_token(&Token::Eq)?;
881-
if parser.parse_keyword(Keyword::TRUE) {
882-
options.push(DataLoadingOption {
883-
option_name: key.value,
884-
option_type: DataLoadingOptionType::BOOLEAN,
885-
value: "TRUE".to_string(),
886-
});
887-
Ok(())
888-
} else if parser.parse_keyword(Keyword::FALSE) {
889-
options.push(DataLoadingOption {
890-
option_name: key.value,
891-
option_type: DataLoadingOptionType::BOOLEAN,
892-
value: "FALSE".to_string(),
893-
});
894-
Ok(())
895-
} else {
896-
match parser.next_token().token {
897-
Token::SingleQuotedString(value) => {
898-
options.push(DataLoadingOption {
899-
option_name: key.value,
900-
option_type: DataLoadingOptionType::STRING,
901-
value,
902-
});
903-
Ok(())
904-
}
905-
Token::Word(word) => {
906-
options.push(DataLoadingOption {
907-
option_name: key.value,
908-
option_type: DataLoadingOptionType::ENUM,
909-
value: word.value,
910-
});
911-
Ok(())
912-
}
913-
_ => parser.expected("expected option value", parser.peek_token()),
914-
}
915-
}
916-
}
917-
_ => parser.expected("another option or ')'", parser.peek_token()),
918-
}?;
953+
Token::Comma => continue,
954+
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
955+
_ => return parser.expected("another option or ')'", parser.peek_token()),
956+
};
919957
}
920958
Ok(options)
921959
}
922960

961+
/// Parses a `KEY = VALUE` construct based on the specified key
962+
fn parse_copy_option(parser: &mut Parser, key: Word) -> Result<DataLoadingOption, ParserError> {
963+
parser.expect_token(&Token::Eq)?;
964+
if parser.parse_keyword(Keyword::TRUE) {
965+
Ok(DataLoadingOption {
966+
option_name: key.value,
967+
option_type: DataLoadingOptionType::BOOLEAN,
968+
value: "TRUE".to_string(),
969+
})
970+
} else if parser.parse_keyword(Keyword::FALSE) {
971+
Ok(DataLoadingOption {
972+
option_name: key.value,
973+
option_type: DataLoadingOptionType::BOOLEAN,
974+
value: "FALSE".to_string(),
975+
})
976+
} else {
977+
match parser.next_token().token {
978+
Token::SingleQuotedString(value) => Ok(DataLoadingOption {
979+
option_name: key.value,
980+
option_type: DataLoadingOptionType::STRING,
981+
value,
982+
}),
983+
Token::Word(word) => Ok(DataLoadingOption {
984+
option_name: key.value,
985+
option_type: DataLoadingOptionType::ENUM,
986+
value: word.value,
987+
}),
988+
Token::Number(n, _) => Ok(DataLoadingOption {
989+
option_name: key.value,
990+
option_type: DataLoadingOptionType::NUMBER,
991+
value: n,
992+
}),
993+
_ => parser.expected("expected option value", parser.peek_token()),
994+
}
995+
}
996+
}
997+
923998
/// Parsing a property of identity or autoincrement column option
924999
/// Syntax:
9251000
/// ```sql

0 commit comments

Comments
 (0)