Skip to content

Commit c968018

Browse files
committed
Avoid cloning tokens in parse_prefix
1 parent 00abaf2 commit c968018

File tree

2 files changed

+72
-33
lines changed

2 files changed

+72
-33
lines changed

src/dialect/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,18 @@ macro_rules! dialect_of {
7575
};
7676
}
7777

78+
// Same as above but without the `.dialect` member lookup so that it can be
79+
// applied to instances of a dialect directly.
80+
//
81+
// I realize there's a big note right there discouraging this, but I'm not
82+
// going to mix re-writing the existing logic while working on token zero-copy
83+
// semantics and lifetime issues.
84+
macro_rules! dialect_is {
85+
($dialect:ident is $($dialect_type:ty)|+) => {
86+
($($dialect.is::<$dialect_type>())||+)
87+
}
88+
}
89+
7890
/// Encapsulates the differences between SQL implementations.
7991
///
8092
/// # SQL Dialects

src/parser/mod.rs

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,15 @@ impl std::error::Error for ParserError {}
186186
// By default, allow expressions up to this deep before erroring
187187
const DEFAULT_REMAINING_DEPTH: usize = 50;
188188

189+
// A constant EOF token that can be referenced.
190+
const EOF_TOKEN: TokenWithSpan = TokenWithSpan {
191+
token: Token::EOF,
192+
span: Span {
193+
start: Location { line: 0, column: 0 },
194+
end: Location { line: 0, column: 0 },
195+
},
196+
};
197+
189198
/// Composite types declarations using angle brackets syntax can be arbitrary
190199
/// nested such that the following declaration is possible:
191200
/// `ARRAY<ARRAY<INT>>`
@@ -1236,7 +1245,7 @@ impl<'a> Parser<'a> {
12361245
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
12371246
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
12381247
// expression that should parse as the column name "date".
1239-
let loc = self.peek_token().span.start;
1248+
let loc = self.peek_token_ref().span.start;
12401249
let opt_expr = self.maybe_parse(|parser| {
12411250
match parser.parse_data_type()? {
12421251
DataType::Interval => parser.parse_interval(),
@@ -1259,8 +1268,14 @@ impl<'a> Parser<'a> {
12591268
return Ok(expr);
12601269
}
12611270

1262-
let next_token = self.next_token();
1263-
let expr = match next_token.token {
1271+
// Cache some dialect properties to avoid lifetime issues with the
1272+
// next_token reference.
1273+
1274+
let dialect = self.dialect;
1275+
1276+
let next_token = self.next_token_ref();
1277+
let span = next_token.span;
1278+
let expr = match &next_token.token {
12641279
Token::Word(w) => {
12651280
// The word we consumed may fall into one of two cases: it has a special meaning, or not.
12661281
// For example, in Snowflake, the word `interval` may have two meanings depending on the context:
@@ -1270,14 +1285,13 @@ impl<'a> Parser<'a> {
12701285
//
12711286
// We first try to parse the word and following tokens as a special expression, and if that fails,
12721287
// we rollback and try to parse it as an identifier.
1273-
match self.try_parse(|parser| {
1274-
parser.parse_expr_prefix_by_reserved_word(&w, next_token.span)
1275-
}) {
1288+
let w = w.clone();
1289+
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
12761290
// This word indicated an expression prefix and parsing was successful
12771291
Ok(Some(expr)) => Ok(expr),
12781292

12791293
// No expression prefix associated with this word
1280-
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?),
1294+
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?),
12811295

12821296
// If parsing of the word as a special expression failed, we are facing two options:
12831297
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
@@ -1288,7 +1302,7 @@ impl<'a> Parser<'a> {
12881302
Err(e) => {
12891303
if !self.dialect.is_reserved_for_identifier(w.keyword) {
12901304
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1291-
parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span)
1305+
parser.parse_expr_prefix_by_unreserved_word(&w, span)
12921306
}) {
12931307
return Ok(expr);
12941308
}
@@ -1300,7 +1314,7 @@ impl<'a> Parser<'a> {
13001314
// array `[1, 2, 3]`
13011315
Token::LBracket => self.parse_array_expr(false),
13021316
tok @ Token::Minus | tok @ Token::Plus => {
1303-
let op = if tok == Token::Plus {
1317+
let op = if *tok == Token::Plus {
13041318
UnaryOperator::Plus
13051319
} else {
13061320
UnaryOperator::Minus
@@ -1312,20 +1326,16 @@ impl<'a> Parser<'a> {
13121326
),
13131327
})
13141328
}
1315-
Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1316-
Ok(Expr::UnaryOp {
1317-
op: UnaryOperator::BangNot,
1318-
expr: Box::new(
1319-
self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1320-
),
1321-
})
1322-
}
1329+
Token::ExclamationMark if dialect.supports_bang_not_operator() => Ok(Expr::UnaryOp {
1330+
op: UnaryOperator::BangNot,
1331+
expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?),
1332+
}),
13231333
tok @ Token::DoubleExclamationMark
13241334
| tok @ Token::PGSquareRoot
13251335
| tok @ Token::PGCubeRoot
13261336
| tok @ Token::AtSign
13271337
| tok @ Token::Tilde
1328-
if dialect_of!(self is PostgreSqlDialect) =>
1338+
if dialect_is!(dialect is PostgreSqlDialect) =>
13291339
{
13301340
let op = match tok {
13311341
Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
@@ -1342,7 +1352,7 @@ impl<'a> Parser<'a> {
13421352
),
13431353
})
13441354
}
1345-
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1355+
Token::EscapedStringLiteral(_) if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) =>
13461356
{
13471357
self.prev_token();
13481358
Ok(Expr::Value(self.parse_value()?))
@@ -1408,11 +1418,11 @@ impl<'a> Parser<'a> {
14081418
self.prev_token();
14091419
Ok(Expr::Value(self.parse_value()?))
14101420
}
1411-
Token::LBrace if self.dialect.supports_dictionary_syntax() => {
1421+
Token::LBrace if dialect.supports_dictionary_syntax() => {
14121422
self.prev_token();
14131423
self.parse_duckdb_struct_literal()
14141424
}
1415-
_ => self.expected("an expression", next_token),
1425+
_ => self.expected_current("an expression"),
14161426
}?;
14171427

14181428
let expr = self.try_parse_method(expr)?;
@@ -3273,11 +3283,17 @@ impl<'a> Parser<'a> {
32733283
}
32743284

32753285
/// Return the first non-whitespace token that has not yet been processed
3276-
/// (or None if reached end-of-file)
3286+
/// or Token::EOF
32773287
pub fn peek_token(&self) -> TokenWithSpan {
32783288
self.peek_nth_token(0)
32793289
}
32803290

3291+
/// Return a reference to the first non-whitespace token that has not yet
3292+
/// been processed or Token::EOF
3293+
pub fn peek_token_ref(&self) -> &TokenWithSpan {
3294+
self.peek_nth_token_ref(0)
3295+
}
3296+
32813297
/// Returns the `N` next non-whitespace tokens that have not yet been
32823298
/// processed.
32833299
///
@@ -3329,7 +3345,12 @@ impl<'a> Parser<'a> {
33293345
}
33303346

33313347
/// Return nth non-whitespace token that has not yet been processed
3332-
pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan {
3348+
pub fn peek_nth_token(&self, n: usize) -> TokenWithSpan {
3349+
self.peek_nth_token_ref(n).clone()
3350+
}
3351+
3352+
/// Return nth non-whitespace token that has not yet been processed
3353+
pub fn peek_nth_token_ref(&self, mut n: usize) -> &TokenWithSpan {
33333354
let mut index = self.index;
33343355
loop {
33353356
index += 1;
@@ -3340,10 +3361,7 @@ impl<'a> Parser<'a> {
33403361
}) => continue,
33413362
non_whitespace => {
33423363
if n == 0 {
3343-
return non_whitespace.cloned().unwrap_or(TokenWithSpan {
3344-
token: Token::EOF,
3345-
span: Span::empty(),
3346-
});
3364+
return non_whitespace.unwrap_or(&EOF_TOKEN);
33473365
}
33483366
n -= 1;
33493367
}
@@ -3376,22 +3394,22 @@ impl<'a> Parser<'a> {
33763394
matched
33773395
}
33783396

3397+
pub fn next_token(&mut self) -> TokenWithSpan {
3398+
self.next_token_ref().clone()
3399+
}
3400+
33793401
/// Return the first non-whitespace token that has not yet been processed
33803402
/// (or None if reached end-of-file) and mark it as processed. OK to call
33813403
/// repeatedly after reaching EOF.
3382-
pub fn next_token(&mut self) -> TokenWithSpan {
3404+
pub fn next_token_ref(&mut self) -> &TokenWithSpan {
33833405
loop {
33843406
self.index += 1;
33853407
match self.tokens.get(self.index - 1) {
33863408
Some(TokenWithSpan {
33873409
token: Token::Whitespace(_),
33883410
span: _,
33893411
}) => continue,
3390-
token => {
3391-
return token
3392-
.cloned()
3393-
.unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF))
3394-
}
3412+
token => return token.unwrap_or(&EOF_TOKEN),
33953413
}
33963414
}
33973415
}
@@ -3428,6 +3446,15 @@ impl<'a> Parser<'a> {
34283446
)
34293447
}
34303448

3449+
/// Report that the current token was found instead of `expected`.
3450+
pub fn expected_current<T>(&self, expected: &str) -> Result<T, ParserError> {
3451+
let found = self.tokens.get(self.index).unwrap_or(&EOF_TOKEN);
3452+
parser_err!(
3453+
format!("Expected: {expected}, found: {found}"),
3454+
found.span.start
3455+
)
3456+
}
3457+
34313458
/// If the current token is the `expected` keyword, consume it and returns
34323459
/// true. Otherwise, no tokens are consumed and returns false.
34333460
#[must_use]

0 commit comments

Comments
 (0)