Skip to content

Distinguish between CHAR VARYING, CHARACTER VARYING, and CHARACTER data types, differentiating them from VARCHAR and CHAR #648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ use super::value::escape_single_quote_string;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DataType {
/// Fixed-length character type e.g. CHAR(10)
/// Fixed-length character type e.g. CHARACTER(10)
Character(Option<u64>),
/// Fixed-length char type e.g. CHAR(10)
Char(Option<u64>),
/// Character varying type e.g. CHARACTER VARYING(10)
CharacterVarying(Option<u64>),
/// Char varying type e.g. CHAR VARYING(10)
CharVarying(Option<u64>),
/// Variable-length character type e.g. VARCHAR(10)
Varchar(Option<u64>),
/// Variable-length character type e.g. NVARCHAR(10)
Expand Down Expand Up @@ -127,10 +133,17 @@ pub enum DataType {
impl fmt::Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DataType::Character(size) => {
format_type_with_optional_length(f, "CHARACTER", size, false)
}
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
DataType::Varchar(size) => {
DataType::CharacterVarying(size) => {
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
}
DataType::CharVarying(size) => {
format_type_with_optional_length(f, "CHAR VARYING", size, false)
}
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
DataType::Nvarchar(size) => {
format_type_with_optional_length(f, "NVARCHAR", size, false)
}
Expand Down
161 changes: 88 additions & 73 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3414,9 +3414,16 @@ impl<'a> Parser<'a> {
}
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
Keyword::CHAR | Keyword::CHARACTER => {
Keyword::CHARACTER => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::Varchar(self.parse_optional_precision()?))
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
} else {
Ok(DataType::Character(self.parse_optional_precision()?))
}
}
Keyword::CHAR => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharVarying(self.parse_optional_precision()?))
} else {
Ok(DataType::Char(self.parse_optional_precision()?))
}
Expand Down Expand Up @@ -5288,80 +5295,88 @@ mod tests {
});
}

// TODO add tests for all data types? https://github.com/sqlparser-rs/sqlparser-rs/issues/2
// TODO when we have dialect validation by data type parsing, split test
#[test]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like we have lost coverage after the rebase (e.g. the new tests for TIME WITHOUT TIMEZONE for example)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alamb that's expected, as I mentioned in my comment above.

I'll add tests along the week, trying to reach 100% coverage for data type parsing. But for now, I'd like to restrict this to the ones I added if possible

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why this PR would remove coverage for types that were added in other PRs. I am fine with expanding coverage in future PRs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. It was because I (dumbly) did the structure for tests twice, as it didn't please me. I'll re-add the coverage for those data types, and for now leave as "additional data types" for those that aren't covered by ANSII

fn test_parse_data_type() {
// BINARY data type
test_parse_data_type("BINARY", DataType::Binary(None), "BINARY");
test_parse_data_type("BINARY(20)", DataType::Binary(Some(20)), "BINARY(20)");

// BLOB data type
test_parse_data_type("BLOB", DataType::Blob(None), "BLOB");
test_parse_data_type("BLOB(50)", DataType::Blob(Some(50)), "BLOB(50)");

// CLOB data type
test_parse_data_type("CLOB", DataType::Clob(None), "CLOB");
test_parse_data_type("CLOB(50)", DataType::Clob(Some(50)), "CLOB(50)");

// Double data type
test_parse_data_type(
"DOUBLE PRECISION",
DataType::DoublePrecision,
"DOUBLE PRECISION",
);
test_parse_data_type("DOUBLE", DataType::Double, "DOUBLE");

// Time data type
test_parse_data_type("TIME", DataType::Time(TimezoneInfo::None), "TIME");
test_parse_data_type(
"TIME WITH TIME ZONE",
DataType::Time(TimezoneInfo::WithTimeZone),
"TIME WITH TIME ZONE",
);
test_parse_data_type(
"TIME WITHOUT TIME ZONE",
DataType::Time(TimezoneInfo::WithoutTimeZone),
"TIME WITHOUT TIME ZONE",
);
test_parse_data_type("TIMETZ", DataType::Time(TimezoneInfo::Tz), "TIMETZ");
#[cfg(test)]
mod test_parse_data_type {
use crate::ast::{DataType, TimezoneInfo};
use crate::dialect::{AnsiDialect, GenericDialect};
use crate::test_utils::TestedDialects;

// Timestamp data type
test_parse_data_type(
"TIMESTAMP",
DataType::Timestamp(TimezoneInfo::None),
"TIMESTAMP",
);
test_parse_data_type(
"TIMESTAMP WITH TIME ZONE",
DataType::Timestamp(TimezoneInfo::WithTimeZone),
"TIMESTAMP WITH TIME ZONE",
);
test_parse_data_type(
"TIMESTAMP WITHOUT TIME ZONE",
DataType::Timestamp(TimezoneInfo::WithoutTimeZone),
"TIMESTAMP WITHOUT TIME ZONE",
);
test_parse_data_type(
"TIMESTAMPTZ",
DataType::Timestamp(TimezoneInfo::Tz),
"TIMESTAMPTZ",
);
macro_rules! test_parse_data_type {
($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{
$dialect.run_parser_method(&*$input, |parser| {
let data_type = parser.parse_data_type().unwrap();
assert_eq!(data_type, $expected_type);
assert_eq!(data_type.to_string(), $input.to_string());
});
}};
}

// VARBINARY data type
test_parse_data_type("VARBINARY", DataType::Varbinary(None), "VARBINARY");
test_parse_data_type(
"VARBINARY(20)",
DataType::Varbinary(Some(20)),
"VARBINARY(20)",
);
#[test]
fn test_ansii_character_string_types() {
// Character string types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-string-type>
let dialect = TestedDialects {
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
};

fn test_parse_data_type(input: &str, expected_type: DataType, expected_str: &str) {
all_dialects().run_parser_method(input, |parser| {
let data_type = parser.parse_data_type().unwrap();
assert_eq!(data_type, expected_type);
assert_eq!(expected_type.to_string(), expected_str.to_string());
});
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));

test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));

test_parse_data_type!(dialect, "CHAR", DataType::Char(None));

test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20)",
DataType::CharacterVarying(Some(20))
);

test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));

test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
}

#[test]
fn test_ansii_datetime_types() {
// Datetime types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type>
let dialect = TestedDialects {
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
};

test_parse_data_type!(dialect, "DATE", DataType::Date);

test_parse_data_type!(dialect, "TIME", DataType::Time(TimezoneInfo::None));

test_parse_data_type!(
dialect,
"TIME WITH TIME ZONE",
DataType::Time(TimezoneInfo::WithTimeZone)
);

test_parse_data_type!(
dialect,
"TIME WITHOUT TIME ZONE",
DataType::Time(TimezoneInfo::WithoutTimeZone)
);

test_parse_data_type!(
dialect,
"TIMESTAMP",
DataType::Timestamp(TimezoneInfo::None)
);

test_parse_data_type!(
dialect,
"TIMESTAMP WITH TIME ZONE",
DataType::Timestamp(TimezoneInfo::WithTimeZone)
);

test_parse_data_type!(
dialect,
"TIMESTAMP WITHOUT TIME ZONE",
DataType::Timestamp(TimezoneInfo::WithoutTimeZone)
);
}
}

Expand Down
8 changes: 4 additions & 4 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ fn parse_create_table() {
let ast = one_statement_parses_to(
sql,
"CREATE TABLE uk_cities (\
name CHARACTER VARYING(100) NOT NULL, \
name VARCHAR(100) NOT NULL, \
lat DOUBLE NULL, \
lng DOUBLE, \
constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \
Expand Down Expand Up @@ -2312,7 +2312,7 @@ fn parse_create_external_table() {
let ast = one_statement_parses_to(
sql,
"CREATE EXTERNAL TABLE uk_cities (\
name CHARACTER VARYING(100) NOT NULL, \
name VARCHAR(100) NOT NULL, \
lat DOUBLE NULL, \
lng DOUBLE) \
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
Expand Down Expand Up @@ -2382,7 +2382,7 @@ fn parse_create_or_replace_external_table() {
let ast = one_statement_parses_to(
sql,
"CREATE OR REPLACE EXTERNAL TABLE uk_cities (\
name CHARACTER VARYING(100) NOT NULL) \
name VARCHAR(100) NOT NULL) \
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
);
match ast {
Expand Down Expand Up @@ -2435,7 +2435,7 @@ fn parse_create_external_table_lowercase() {
let ast = one_statement_parses_to(
sql,
"CREATE EXTERNAL TABLE uk_cities (\
name CHARACTER VARYING(100) NOT NULL, \
name VARCHAR(100) NOT NULL, \
lat DOUBLE NULL, \
lng DOUBLE) \
STORED AS PARQUET LOCATION '/tmp/example.csv'",
Expand Down
6 changes: 3 additions & 3 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn parse_create_table_with_defaults() {
},
ColumnDef {
name: "first_name".into(),
data_type: DataType::Varchar(Some(45)),
data_type: DataType::CharacterVarying(Some(45)),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand All @@ -83,7 +83,7 @@ fn parse_create_table_with_defaults() {
},
ColumnDef {
name: "last_name".into(),
data_type: DataType::Varchar(Some(45)),
data_type: DataType::CharacterVarying(Some(45)),
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
options: vec![ColumnOptionDef {
name: None,
Expand All @@ -92,7 +92,7 @@ fn parse_create_table_with_defaults() {
},
ColumnDef {
name: "email".into(),
data_type: DataType::Varchar(Some(50)),
data_type: DataType::CharacterVarying(Some(50)),
collation: None,
options: vec![],
},
Expand Down