From 472f0d480650bb1c3c95596fcb910895b8d25ab5 Mon Sep 17 00:00:00 2001 From: David Szotten Date: Thu, 20 Jul 2023 20:45:41 +0100 Subject: [PATCH 1/7] fix the ranges of constants inside f-strings --- ...parser__parser__tests__parse_f_string.snap | 2 +- ...rustpython_parser__parser__tests__try.snap | 4 +- ...ython_parser__parser__tests__try_star.snap | 8 +- ...string__tests__fstring_constant_range.snap | 73 +++++++++++++++++++ ...ing__tests__fstring_escaped_character.snap | 2 +- ...tring__tests__fstring_escaped_newline.snap | 2 +- ...ing__tests__fstring_line_continuation.snap | 2 +- ...ring_parse_self_documenting_base_more.snap | 4 +- ...ing__tests__fstring_unescaped_newline.snap | 2 +- ...tring__tests__parse_f_string_concat_1.snap | 2 +- ...tring__tests__parse_f_string_concat_2.snap | 2 +- ...tring__tests__parse_f_string_concat_3.snap | 2 +- ..._parser__string__tests__parse_fstring.snap | 2 +- ...ing__tests__parse_u_f_string_concat_1.snap | 2 +- parser/src/string.rs | 39 ++++++++-- 15 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap b/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap index ee547558..0d72dcdd 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..14, + range: 2..13, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__try.snap b/parser/src/snapshots/rustpython_parser__parser__tests__try.snap index d76069a1..82f88058 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__try.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__try.snap @@ -80,7 +80,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 62..81, + range: 64..71, value: Str( "caught ", ), @@ -167,7 +167,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 114..133, + range: 116..123, value: Str( "caught ", ), diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap b/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap index 841d9823..c9df3e38 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap @@ -184,7 +184,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 133..179, + range: 135..142, value: Str( "caught ", ), @@ -222,7 +222,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 133..179, + range: 151..164, value: Str( " with nested ", ), @@ -304,7 +304,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 213..259, + range: 215..222, value: Str( "caught ", ), @@ -342,7 +342,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 213..259, + range: 231..244, value: Str( " with nested ", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap new file mode 100644 index 00000000..741a3a05 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap @@ -0,0 +1,73 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Expr( + StmtExpr { + range: 0..22, + value: JoinedStr( + ExprJoinedStr { + range: 0..22, + values: [ + Constant( + ExprConstant { + range: 2..5, + value: Str( + "aaa", + ), + kind: None, + }, + ), + FormattedValue( + ExprFormattedValue { + range: 0..22, + value: Name( + ExprName { + range: 6..9, + id: "bbb", + ctx: Load, + }, + ), + conversion: None, + format_spec: None, + }, + ), + Constant( + ExprConstant { + range: 10..13, + value: Str( + "ccc", + ), + kind: None, + }, + ), + FormattedValue( + ExprFormattedValue { + range: 0..22, + value: Name( + ExprName { + range: 14..17, + id: "ddd", + ctx: Load, + }, + ), + conversion: None, + format_spec: None, + }, + ), + Constant( + ExprConstant { + range: 18..21, + value: Str( + "eee", + ), + kind: None, + }, + ), + ], + }, + ), + }, + ), +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap index a7e92554..d75701f2 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..8, + range: 2..4, value: Str( "\\", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap index 276bff7b..1120a015 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..8, + range: 2..4, value: Str( "\n", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap index 6b536a08..1b45ccc3 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..9, + range: 3..5, value: Str( "\\\n", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap index 8ce8de79..fe8e10f3 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap @@ -5,7 +5,7 @@ expression: parse_ast [ Constant( ExprConstant { - range: 0..38, + range: 2..6, value: Str( "mix ", ), @@ -46,7 +46,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..38, + range: 13..28, value: Str( " with text and ", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap index 18d0dd5e..b7e3a992 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..11, + range: 4..5, value: Str( "\n", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap index 274823e8..3794262b 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..17, + range: 0..16, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap index 274823e8..3794262b 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..17, + range: 0..16, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap index 1cefb19d..b7bbf61d 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..22, + range: 0..16, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap index aeb71829..9f33bb25 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap @@ -33,7 +33,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..18, + range: 10..17, value: Str( "{foo}", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap index 68aacbe0..37667668 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..18, + range: 0..17, value: Str( "Hello world", ), diff --git a/parser/src/string.rs b/parser/src/string.rs index cb2b0e19..a9431e0b 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -11,6 +11,7 @@ use crate::{ token::{StringKind, Tok}, }; use itertools::Itertools; +use rustpython_ast::Ranged; use rustpython_parser_core::{ text_size::{TextLen, TextSize}, ConversionFlag, @@ -452,6 +453,7 @@ impl<'a> StringParser<'a> { } let mut content = String::new(); + let mut content_start = self.location; let mut values = vec![]; while let Some(&ch) = self.peek() { @@ -477,7 +479,10 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.drain(..).collect::().into(), kind: None, - range: self.range(), + range: TextRange::new( + std::mem::replace(&mut content_start, self.location), + self.location - TextSize::from(1), + ), } .into(), ), @@ -486,6 +491,7 @@ impl<'a> StringParser<'a> { let parsed_values = self.parse_formatted_value(nested)?; values.extend(parsed_values); + content_start = self.location; } '}' => { if nested > 0 { @@ -516,7 +522,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.into(), kind: None, - range: self.range(), + range: TextRange::new(content_start, self.location), } .into(), ), @@ -674,34 +680,43 @@ pub(crate) fn parse_strings( // De-duplicate adjacent constants. let mut deduped: Vec = vec![]; let mut current: Vec = vec![]; + let mut current_start = initial_start; + let mut current_end = last_end; - let take_current = |current: &mut Vec| -> Expr { + let take_current = |current: &mut Vec, start, end| -> Expr { Expr::Constant(ast::ExprConstant { value: Constant::Str(current.drain(..).join("")), kind: initial_kind.clone(), - range: TextRange::new(initial_start, last_end), + range: TextRange::new(start, end), }) }; for (start, (source, kind, triple_quoted), end) in values { for value in parse_string(&source, kind, triple_quoted, start, end)? { + let value_range = value.range(); match value { Expr::FormattedValue { .. } => { if !current.is_empty() { - deduped.push(take_current(&mut current)); + deduped.push(take_current(&mut current, current_start, current_end)); } deduped.push(value) } Expr::Constant(ast::ExprConstant { - value: Constant::Str(value), + value: Constant::Str(inner), .. - }) => current.push(value), + }) => { + if current.is_empty() { + current_start = value_range.start(); + } + current_end = value_range.end(); + current.push(inner); + } _ => unreachable!("Unexpected non-string expression."), } } } if !current.is_empty() { - deduped.push(take_current(&mut current)); + deduped.push(take_current(&mut current, current_start, current_end)); } Ok(Expr::JoinedStr(ast::ExprJoinedStr { @@ -1069,6 +1084,14 @@ mod tests { insta::assert_debug_snapshot!(parse_ast); } + #[test] + fn test_fstring_constant_range() { + let source = r#"f"aaa{bbb}ccc{ddd}eee""#; + let parse_ast = ast::Suite::parse(source, "").unwrap(); + // assert!(false); + insta::assert_debug_snapshot!(parse_ast); + } + #[test] fn test_fstring_unescaped_newline() { let source = r#"f""" From 7b97cd35df37fa5a39f79a5ab882836207b5b1ad Mon Sep 17 00:00:00 2001 From: David Szotten Date: Sat, 22 Jul 2023 18:01:03 +0100 Subject: [PATCH 2/7] check all ranges self-documenting f-strings (`f"{foo=}"`) are still outstanding see https://github.com/astral-sh/ruff/issues/5970 --- ...rustpython_parser__parser__tests__try.snap | 4 +- ...ython_parser__parser__tests__try_star.snap | 8 +- ...string__tests__fstring_constant_range.snap | 4 +- ...ing__tests__fstring_escaped_character.snap | 2 +- ...tring__tests__fstring_escaped_newline.snap | 2 +- ...ing__tests__fstring_line_continuation.snap | 2 +- ...__fstring_parse_self_documenting_base.snap | 6 +- ...ring_parse_self_documenting_base_more.snap | 12 +-- ...fstring_parse_self_documenting_format.snap | 10 +-- ...ing__tests__fstring_unescaped_newline.snap | 2 +- ...tring__tests__parse_f_string_concat_1.snap | 2 +- ...tring__tests__parse_f_string_concat_2.snap | 2 +- ...tring__tests__parse_f_string_concat_3.snap | 4 +- ..._parser__string__tests__parse_fstring.snap | 4 +- ...__string__tests__parse_fstring_equals.snap | 2 +- ...ing__tests__parse_fstring_nested_spec.snap | 6 +- ...ring__tests__parse_fstring_not_equals.snap | 2 +- ..._tests__parse_fstring_not_nested_spec.snap | 6 +- ...ts__parse_fstring_self_doc_prec_space.snap | 6 +- ...parse_fstring_self_doc_trailing_space.snap | 6 +- ...ring__tests__parse_fstring_yield_expr.snap | 2 +- ...ing__tests__parse_u_f_string_concat_1.snap | 2 +- ...ing__tests__parse_u_f_string_concat_2.snap | 2 +- ...on_parser__string__tests__raw_fstring.snap | 2 +- ...ing__tests__triple_quoted_raw_fstring.snap | 2 +- parser/src/string.rs | 90 ++++++++----------- 26 files changed, 89 insertions(+), 103 deletions(-) diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__try.snap b/parser/src/snapshots/rustpython_parser__parser__tests__try.snap index 82f88058..8b0b71d0 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__try.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__try.snap @@ -89,7 +89,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 62..81, + range: 71..80, value: Call( ExprCall { range: 72..79, @@ -176,7 +176,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 114..133, + range: 123..132, value: Call( ExprCall { range: 124..131, diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap b/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap index c9df3e38..4bbeafff 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__try_star.snap @@ -193,7 +193,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 133..179, + range: 142..151, value: Call( ExprCall { range: 143..150, @@ -231,7 +231,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 133..179, + range: 164..178, value: Attribute( ExprAttribute { range: 165..177, @@ -313,7 +313,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 213..259, + range: 222..231, value: Call( ExprCall { range: 223..230, @@ -351,7 +351,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 213..259, + range: 244..258, value: Attribute( ExprAttribute { range: 245..257, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap index 741a3a05..fd948684 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_constant_range.snap @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..22, + range: 5..10, value: Name( ExprName { range: 6..9, @@ -44,7 +44,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..22, + range: 13..18, value: Name( ExprName { range: 14..17, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap index d75701f2..18a0f192 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_character.snap @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..8, + range: 4..7, value: Name( ExprName { range: 5..6, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap index 1120a015..472507b6 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_escaped_newline.snap @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..8, + range: 4..7, value: Name( ExprName { range: 5..6, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap index 1b45ccc3..f9286876 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_line_continuation.snap @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..9, + range: 5..8, value: Name( ExprName { range: 6..7, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base.snap index 33eed095..9fdcc7be 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base.snap @@ -5,7 +5,7 @@ expression: parse_ast [ Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( "user=", ), @@ -14,7 +14,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( "", ), @@ -23,7 +23,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..10, + range: 2..9, value: Name( ExprName { range: 3..7, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap index fe8e10f3..91cf8126 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_base_more.snap @@ -14,7 +14,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..38, + range: 6..13, value: Str( "user=", ), @@ -23,7 +23,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..38, + range: 6..13, value: Str( "", ), @@ -32,7 +32,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..38, + range: 6..13, value: Name( ExprName { range: 7..11, @@ -55,7 +55,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..38, + range: 28..37, value: Str( "second=", ), @@ -64,7 +64,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..38, + range: 28..37, value: Str( "", ), @@ -73,7 +73,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..38, + range: 28..37, value: Name( ExprName { range: 29..35, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_format.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_format.snap index 9e4391e4..18d0d2df 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_format.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_parse_self_documenting_format.snap @@ -5,7 +5,7 @@ expression: parse_ast [ Constant( ExprConstant { - range: 0..14, + range: 2..13, value: Str( "user=", ), @@ -14,7 +14,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..14, + range: 2..13, value: Str( "", ), @@ -23,7 +23,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..14, + range: 2..13, value: Name( ExprName { range: 3..7, @@ -35,11 +35,11 @@ expression: parse_ast format_spec: Some( JoinedStr( ExprJoinedStr { - range: 0..14, + range: 9..12, values: [ Constant( ExprConstant { - range: 0..14, + range: 9..12, value: Str( ">10", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap b/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap index b7e3a992..0190a3e6 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__fstring_unescaped_newline.snap @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..11, + range: 5..8, value: Name( ExprName { range: 6..7, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap index 3794262b..0925a623 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..16, + range: 1..16, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap index 3794262b..0925a623 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..16, + range: 1..16, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap index b7bbf61d..ab7f507b 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..16, + range: 1..16, value: Str( "Hello world", ), @@ -21,7 +21,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 9..22, + range: 16..21, value: Constant( ExprConstant { range: 17..20, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap index 9f33bb25..2af0e247 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..18, + range: 2..5, value: Name( ExprName { range: 3..4, @@ -19,7 +19,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..18, + range: 5..10, value: Name( ExprName { range: 7..8, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_equals.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_equals.snap index b0007452..e1834fe8 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_equals.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_equals.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..13, + range: 2..12, value: Compare( ExprCompare { range: 3..11, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_nested_spec.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_nested_spec.snap index 61462e3a..fdfffcad 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_nested_spec.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_nested_spec.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..15, + range: 2..14, value: Name( ExprName { range: 3..6, @@ -17,11 +17,11 @@ expression: parse_ast format_spec: Some( JoinedStr( ExprJoinedStr { - range: 0..15, + range: 7..13, values: [ FormattedValue( ExprFormattedValue { - range: 0..15, + range: 7..13, value: Name( ExprName { range: 8..12, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_equals.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_equals.snap index c0cf8ebe..432f87d4 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_equals.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_equals.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..11, + range: 2..10, value: Compare( ExprCompare { range: 3..9, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_nested_spec.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_nested_spec.snap index dfc93454..bbecd705 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_nested_spec.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_not_nested_spec.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..13, + range: 2..12, value: Name( ExprName { range: 3..6, @@ -17,11 +17,11 @@ expression: parse_ast format_spec: Some( JoinedStr( ExprJoinedStr { - range: 0..13, + range: 7..11, values: [ Constant( ExprConstant { - range: 0..13, + range: 7..11, value: Str( "spec", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_prec_space.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_prec_space.snap index f02d4cef..5a290d1e 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_prec_space.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_prec_space.snap @@ -5,7 +5,7 @@ expression: parse_ast [ Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( "x =", ), @@ -14,7 +14,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( "", ), @@ -23,7 +23,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..10, + range: 2..9, value: Name( ExprName { range: 3..4, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_trailing_space.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_trailing_space.snap index c74ea03b..53d66931 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_trailing_space.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_self_doc_trailing_space.snap @@ -5,7 +5,7 @@ expression: parse_ast [ Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( "x=", ), @@ -14,7 +14,7 @@ expression: parse_ast ), Constant( ExprConstant { - range: 0..10, + range: 2..9, value: Str( " ", ), @@ -23,7 +23,7 @@ expression: parse_ast ), FormattedValue( ExprFormattedValue { - range: 0..10, + range: 2..9, value: Name( ExprName { range: 3..4, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_yield_expr.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_yield_expr.snap index e2bd4ea3..0b9ed9ca 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_yield_expr.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_fstring_yield_expr.snap @@ -5,7 +5,7 @@ expression: parse_ast [ FormattedValue( ExprFormattedValue { - range: 0..10, + range: 2..9, value: Yield( ExprYield { range: 3..8, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap index 37667668..b8a3ab87 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..17, + range: 2..17, value: Str( "Hello world", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap index c77d5090..8bed0e1e 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ Constant( ExprConstant { - range: 0..22, + range: 2..21, value: Str( "Hello world!", ), diff --git a/parser/src/snapshots/rustpython_parser__string__tests__raw_fstring.snap b/parser/src/snapshots/rustpython_parser__string__tests__raw_fstring.snap index d6f10352..9b52d7c7 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__raw_fstring.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__raw_fstring.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ FormattedValue( ExprFormattedValue { - range: 0..7, + range: 3..6, value: Name( ExprName { range: 4..5, diff --git a/parser/src/snapshots/rustpython_parser__string__tests__triple_quoted_raw_fstring.snap b/parser/src/snapshots/rustpython_parser__string__tests__triple_quoted_raw_fstring.snap index 759ece38..9e2db104 100644 --- a/parser/src/snapshots/rustpython_parser__string__tests__triple_quoted_raw_fstring.snap +++ b/parser/src/snapshots/rustpython_parser__string__tests__triple_quoted_raw_fstring.snap @@ -12,7 +12,7 @@ expression: parse_ast values: [ FormattedValue( ExprFormattedValue { - range: 0..11, + range: 5..8, value: Name( ExprName { range: 6..7, diff --git a/parser/src/string.rs b/parser/src/string.rs index a9431e0b..dc08ec2d 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -23,19 +23,11 @@ const MAX_UNICODE_NAME: usize = 88; struct StringParser<'a> { chars: std::iter::Peekable>, kind: StringKind, - start: TextSize, - end: TextSize, location: TextSize, } impl<'a> StringParser<'a> { - fn new( - source: &'a str, - kind: StringKind, - triple_quoted: bool, - start: TextSize, - end: TextSize, - ) -> Self { + fn new(source: &'a str, kind: StringKind, triple_quoted: bool, start: TextSize) -> Self { let offset = kind.prefix_len() + if triple_quoted { TextSize::from(3) @@ -45,8 +37,6 @@ impl<'a> StringParser<'a> { Self { chars: source.chars().peekable(), kind, - start, - end, location: start + offset, } } @@ -69,12 +59,13 @@ impl<'a> StringParser<'a> { } #[inline] - fn expr(&self, node: Expr) -> Expr { - node + fn current_range(&self, current_location: TextSize) -> TextRange { + TextRange::new(current_location, self.location) } - fn range(&self) -> TextRange { - TextRange::new(self.start, self.end) + #[inline] + fn expr(&self, node: Expr) -> Expr { + node } fn parse_unicode_literal(&mut self, literal_number: usize) -> Result { @@ -192,7 +183,7 @@ impl<'a> StringParser<'a> { let mut conversion = ConversionFlag::None; let mut self_documenting = false; let mut trailing_seq = String::new(); - let location = self.get_pos(); + let location = self.get_pos() - TextSize::from(1); while let Some(ch) = self.next_char() { match ch { @@ -237,13 +228,14 @@ impl<'a> StringParser<'a> { } ':' if delimiters.is_empty() => { + let location = self.get_pos(); let parsed_spec = self.parse_spec(nested)?; spec = Some(Box::new( self.expr( ast::ExprJoinedStr { values: parsed_spec, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -323,17 +315,19 @@ impl<'a> StringParser<'a> { ), conversion, format_spec: spec, - range: self.range(), + range: self.current_range(location), } .into(), )] } else { + // TODO: range is wrong but `self_documenting` needs revisiting beyond + // ranges vec![ self.expr( ast::ExprConstant { value: Constant::Str(expression.to_owned() + "="), kind: None, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -341,7 +335,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: trailing_seq.into(), kind: None, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -363,7 +357,7 @@ impl<'a> StringParser<'a> { conversion }, format_spec: spec, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -402,6 +396,7 @@ impl<'a> StringParser<'a> { fn parse_spec(&mut self, nested: u8) -> Result, LexicalError> { let mut spec_constructor = Vec::new(); let mut constant_piece = String::new(); + let mut location = self.get_pos(); while let Some(&next) = self.peek() { match next { '{' => { @@ -411,7 +406,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -419,6 +414,7 @@ impl<'a> StringParser<'a> { } let parsed_expr = self.parse_fstring(nested + 1)?; spec_constructor.extend(parsed_expr); + location = self.get_pos(); continue; } '}' => { @@ -436,7 +432,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.range(), + range: self.current_range(location), } .into(), ), @@ -453,7 +449,7 @@ impl<'a> StringParser<'a> { } let mut content = String::new(); - let mut content_start = self.location; + let mut location = self.get_pos(); let mut values = vec![]; while let Some(&ch) = self.peek() { @@ -474,15 +470,13 @@ impl<'a> StringParser<'a> { } } if !content.is_empty() { + let range = self.current_range(location).sub_end(1.into()); values.push( self.expr( ast::ExprConstant { value: content.drain(..).collect::().into(), kind: None, - range: TextRange::new( - std::mem::replace(&mut content_start, self.location), - self.location - TextSize::from(1), - ), + range, } .into(), ), @@ -491,7 +485,7 @@ impl<'a> StringParser<'a> { let parsed_values = self.parse_formatted_value(nested)?; values.extend(parsed_values); - content_start = self.location; + location = self.get_pos(); } '}' => { if nested > 0 { @@ -522,7 +516,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.into(), kind: None, - range: TextRange::new(content_start, self.location), + range: self.current_range(location), } .into(), ), @@ -534,6 +528,7 @@ impl<'a> StringParser<'a> { fn parse_bytes(&mut self) -> Result { let mut content = String::new(); + let location = self.get_pos(); while let Some(ch) = self.next_char() { match ch { '\\' if !self.kind.is_raw() => { @@ -557,7 +552,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Bytes(content.chars().map(|c| c as u8).collect()), kind: None, - range: self.range(), + range: self.current_range(location), } .into(), )) @@ -565,6 +560,7 @@ impl<'a> StringParser<'a> { fn parse_string(&mut self) -> Result { let mut content = String::new(); + let location = self.get_pos(); while let Some(ch) = self.next_char() { match ch { '\\' if !self.kind.is_raw() => { @@ -577,7 +573,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Str(content), kind: self.kind.is_unicode().then(|| "u".to_string()), - range: self.range(), + range: self.current_range(location), } .into(), )) @@ -589,15 +585,14 @@ impl<'a> StringParser<'a> { } else if self.kind.is_any_bytes() { self.parse_bytes().map(|expr| vec![expr]) } else { - self.parse_string().map(|expr| vec![expr]) + dbg!(self.parse_string().map(|expr| vec![expr])) } } } fn parse_fstring_expr(source: &str, location: TextSize) -> Result { let fstring_body = format!("({source})"); - let start = location - TextSize::from(1); - ast::Expr::parse_starts_at(&fstring_body, "", start) + ast::Expr::parse_starts_at(&fstring_body, "", location) } fn parse_string( @@ -605,9 +600,8 @@ fn parse_string( kind: StringKind, triple_quoted: bool, start: TextSize, - end: TextSize, ) -> Result, LexicalError> { - StringParser::new(source, kind, triple_quoted, start, end).parse() + StringParser::new(source, kind, triple_quoted, start).parse() } pub(crate) fn parse_strings( @@ -637,8 +631,8 @@ pub(crate) fn parse_strings( if has_bytes { let mut content: Vec = vec![]; - for (start, (source, kind, triple_quoted), end) in values { - for value in parse_string(&source, kind, triple_quoted, start, end)? { + for (start, (source, kind, triple_quoted), _) in values { + for value in parse_string(&source, kind, triple_quoted, start)? { match value { Expr::Constant(ast::ExprConstant { value: Constant::Bytes(value), @@ -658,8 +652,8 @@ pub(crate) fn parse_strings( if !has_fstring { let mut content: Vec = vec![]; - for (start, (source, kind, triple_quoted), end) in values { - for value in parse_string(&source, kind, triple_quoted, start, end)? { + for (start, (source, kind, triple_quoted), _) in values { + for value in parse_string(&source, kind, triple_quoted, start)? { match value { Expr::Constant(ast::ExprConstant { value: Constant::Str(value), @@ -691,8 +685,8 @@ pub(crate) fn parse_strings( }) }; - for (start, (source, kind, triple_quoted), end) in values { - for value in parse_string(&source, kind, triple_quoted, start, end)? { + for (start, (source, kind, triple_quoted), _) in values { + for value in parse_string(&source, kind, triple_quoted, start)? { let value_range = value.range(); match value { Expr::FormattedValue { .. } => { @@ -833,14 +827,7 @@ mod tests { use crate::{ast, Parse}; fn parse_fstring(source: &str) -> Result, LexicalError> { - StringParser::new( - source, - StringKind::FString, - false, - TextSize::default(), - TextSize::default() + source.text_len() + TextSize::from(3), // 3 for prefix and quotes - ) - .parse() + StringParser::new(source, StringKind::FString, false, TextSize::default()).parse() } #[test] @@ -1088,7 +1075,6 @@ mod tests { fn test_fstring_constant_range() { let source = r#"f"aaa{bbb}ccc{ddd}eee""#; let parse_ast = ast::Suite::parse(source, "").unwrap(); - // assert!(false); insta::assert_debug_snapshot!(parse_ast); } From f73f1d49dec23eb0a6f43c3292ada9685c8c7fe1 Mon Sep 17 00:00:00 2001 From: David Szotten Date: Tue, 25 Jul 2023 14:13:02 +0100 Subject: [PATCH 3/7] remove stray dbg --- parser/src/string.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/src/string.rs b/parser/src/string.rs index dc08ec2d..6cdc63d4 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -585,7 +585,7 @@ impl<'a> StringParser<'a> { } else if self.kind.is_any_bytes() { self.parse_bytes().map(|expr| vec![expr]) } else { - dbg!(self.parse_string().map(|expr| vec![expr])) + self.parse_string().map(|expr| vec![expr]) } } } From 13c68ba8f8be7f79624f1445bf72d5e73820ba99 Mon Sep 17 00:00:00 2001 From: David Szotten Date: Tue, 25 Jul 2023 15:27:35 +0100 Subject: [PATCH 4/7] use multipeek to avoid consuming single brackets use multipeek so we can check for `{{` without having to consume the first `{` and can instead leave that to the sub-parsers. this also means we can get the location for the previous (now ending) range before advancing the cursor --- parser/Cargo.toml | 1 + parser/src/string.rs | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index b6c20ff8..ed094744 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -33,6 +33,7 @@ num-bigint = { workspace = true, optional = true } num-traits = { workspace = true } unicode_names2 = { workspace = true } +multipeek = "0.1.2" unic-emoji-char = "0.9.0" unic-ucd-ident = "0.9.0" lalrpop-util = { version = "0.20.0", default-features = false } diff --git a/parser/src/string.rs b/parser/src/string.rs index 6cdc63d4..70202db2 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -11,6 +11,7 @@ use crate::{ token::{StringKind, Tok}, }; use itertools::Itertools; +use multipeek::{multipeek, MultiPeek}; use rustpython_ast::Ranged; use rustpython_parser_core::{ text_size::{TextLen, TextSize}, @@ -21,7 +22,7 @@ use rustpython_parser_core::{ const MAX_UNICODE_NAME: usize = 88; struct StringParser<'a> { - chars: std::iter::Peekable>, + chars: MultiPeek>, kind: StringKind, location: TextSize, } @@ -35,7 +36,9 @@ impl<'a> StringParser<'a> { TextSize::from(1) }; Self { - chars: source.chars().peekable(), + // use `multipeek::multipeek` instead of `chars().multipeek` from itertools because we + // want peek to be idempotent and only rarely need to peek one extra char + chars: multipeek(source.chars()), kind, location: start + offset, } @@ -53,6 +56,11 @@ impl<'a> StringParser<'a> { self.chars.peek() } + #[inline] + fn peek2(&mut self) -> Option<&char> { + self.chars.peek_nth(1) + } + #[inline] fn get_pos(&self) -> TextSize { self.location @@ -183,7 +191,9 @@ impl<'a> StringParser<'a> { let mut conversion = ConversionFlag::None; let mut self_documenting = false; let mut trailing_seq = String::new(); - let location = self.get_pos() - TextSize::from(1); + let location = self.get_pos(); + + assert_eq!(self.next_char(), Some('{')); while let Some(ch) = self.next_char() { match ch { @@ -321,7 +331,7 @@ impl<'a> StringParser<'a> { )] } else { // TODO: range is wrong but `self_documenting` needs revisiting beyond - // ranges + // ranges: https://github.com/astral-sh/ruff/issues/5970 vec![ self.expr( ast::ExprConstant { @@ -455,10 +465,10 @@ impl<'a> StringParser<'a> { while let Some(&ch) = self.peek() { match ch { '{' => { - self.next_char(); if nested == 0 { - match self.peek() { + match self.peek2() { Some('{') => { + self.next_char(); self.next_char(); content.push('{'); continue; @@ -470,13 +480,12 @@ impl<'a> StringParser<'a> { } } if !content.is_empty() { - let range = self.current_range(location).sub_end(1.into()); values.push( self.expr( ast::ExprConstant { value: content.drain(..).collect::().into(), kind: None, - range, + range: self.current_range(location), } .into(), ), From b937fe6731092d86d493053f1968a96c9252e8ee Mon Sep 17 00:00:00 2001 From: David Szotten Date: Tue, 25 Jul 2023 15:34:39 +0100 Subject: [PATCH 5/7] location -> start_location --- parser/src/string.rs | 70 +++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/parser/src/string.rs b/parser/src/string.rs index 70202db2..add4d82e 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -67,8 +67,8 @@ impl<'a> StringParser<'a> { } #[inline] - fn current_range(&self, current_location: TextSize) -> TextRange { - TextRange::new(current_location, self.location) + fn current_range(&self, start_location: TextSize) -> TextRange { + TextRange::new(start_location, self.location) } #[inline] @@ -191,7 +191,7 @@ impl<'a> StringParser<'a> { let mut conversion = ConversionFlag::None; let mut self_documenting = false; let mut trailing_seq = String::new(); - let location = self.get_pos(); + let start_location = self.get_pos(); assert_eq!(self.next_char(), Some('{')); @@ -238,14 +238,14 @@ impl<'a> StringParser<'a> { } ':' if delimiters.is_empty() => { - let location = self.get_pos(); + let start_location = self.get_pos(); let parsed_spec = self.parse_spec(nested)?; spec = Some(Box::new( self.expr( ast::ExprJoinedStr { values: parsed_spec, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -316,16 +316,18 @@ impl<'a> StringParser<'a> { vec![self.expr( ast::ExprFormattedValue { value: Box::new( - parse_fstring_expr(&expression, location).map_err(|e| { - FStringError::new( - InvalidExpression(Box::new(e.error)), - location, - ) - })?, + parse_fstring_expr(&expression, start_location).map_err( + |e| { + FStringError::new( + InvalidExpression(Box::new(e.error)), + start_location, + ) + }, + )?, ), conversion, format_spec: spec, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), )] @@ -337,7 +339,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Str(expression.to_owned() + "="), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -345,19 +347,21 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: trailing_seq.into(), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), self.expr( ast::ExprFormattedValue { value: Box::new( - parse_fstring_expr(&expression, location).map_err(|e| { - FStringError::new( - InvalidExpression(Box::new(e.error)), - location, - ) - })?, + parse_fstring_expr(&expression, start_location).map_err( + |e| { + FStringError::new( + InvalidExpression(Box::new(e.error)), + start_location, + ) + }, + )?, ), conversion: if conversion == ConversionFlag::None && spec.is_none() @@ -367,7 +371,7 @@ impl<'a> StringParser<'a> { conversion }, format_spec: spec, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -406,7 +410,7 @@ impl<'a> StringParser<'a> { fn parse_spec(&mut self, nested: u8) -> Result, LexicalError> { let mut spec_constructor = Vec::new(); let mut constant_piece = String::new(); - let mut location = self.get_pos(); + let mut start_location = self.get_pos(); while let Some(&next) = self.peek() { match next { '{' => { @@ -416,7 +420,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -424,7 +428,7 @@ impl<'a> StringParser<'a> { } let parsed_expr = self.parse_fstring(nested + 1)?; spec_constructor.extend(parsed_expr); - location = self.get_pos(); + start_location = self.get_pos(); continue; } '}' => { @@ -442,7 +446,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -459,7 +463,7 @@ impl<'a> StringParser<'a> { } let mut content = String::new(); - let mut location = self.get_pos(); + let mut start_location = self.get_pos(); let mut values = vec![]; while let Some(&ch) = self.peek() { @@ -485,7 +489,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.drain(..).collect::().into(), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -494,7 +498,7 @@ impl<'a> StringParser<'a> { let parsed_values = self.parse_formatted_value(nested)?; values.extend(parsed_values); - location = self.get_pos(); + start_location = self.get_pos(); } '}' => { if nested > 0 { @@ -525,7 +529,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.into(), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), ), @@ -537,7 +541,7 @@ impl<'a> StringParser<'a> { fn parse_bytes(&mut self) -> Result { let mut content = String::new(); - let location = self.get_pos(); + let start_location = self.get_pos(); while let Some(ch) = self.next_char() { match ch { '\\' if !self.kind.is_raw() => { @@ -561,7 +565,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Bytes(content.chars().map(|c| c as u8).collect()), kind: None, - range: self.current_range(location), + range: self.current_range(start_location), } .into(), )) @@ -569,7 +573,7 @@ impl<'a> StringParser<'a> { fn parse_string(&mut self) -> Result { let mut content = String::new(); - let location = self.get_pos(); + let start_location = self.get_pos(); while let Some(ch) = self.next_char() { match ch { '\\' if !self.kind.is_raw() => { @@ -582,7 +586,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Str(content), kind: self.kind.is_unicode().then(|| "u".to_string()), - range: self.current_range(location), + range: self.current_range(start_location), } .into(), )) From db221e0a5a97351781f13dacc7807ba550ba4215 Mon Sep 17 00:00:00 2001 From: David Szotten Date: Tue, 25 Jul 2023 15:36:44 +0100 Subject: [PATCH 6/7] current_range -> range --- parser/src/string.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/parser/src/string.rs b/parser/src/string.rs index add4d82e..6c918012 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -67,7 +67,7 @@ impl<'a> StringParser<'a> { } #[inline] - fn current_range(&self, start_location: TextSize) -> TextRange { + fn range(&self, start_location: TextSize) -> TextRange { TextRange::new(start_location, self.location) } @@ -245,7 +245,7 @@ impl<'a> StringParser<'a> { self.expr( ast::ExprJoinedStr { values: parsed_spec, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -327,7 +327,7 @@ impl<'a> StringParser<'a> { ), conversion, format_spec: spec, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), )] @@ -339,7 +339,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Str(expression.to_owned() + "="), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -347,7 +347,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: trailing_seq.into(), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -371,7 +371,7 @@ impl<'a> StringParser<'a> { conversion }, format_spec: spec, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -420,7 +420,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -446,7 +446,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: constant_piece.drain(..).collect::().into(), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -489,7 +489,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.drain(..).collect::().into(), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -529,7 +529,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: content.into(), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), ), @@ -565,7 +565,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Bytes(content.chars().map(|c| c as u8).collect()), kind: None, - range: self.current_range(start_location), + range: self.range(start_location), } .into(), )) @@ -586,7 +586,7 @@ impl<'a> StringParser<'a> { ast::ExprConstant { value: Constant::Str(content), kind: self.kind.is_unicode().then(|| "u".to_string()), - range: self.current_range(start_location), + range: self.range(start_location), } .into(), )) From dd1b11ae13dc59583a28ef588b5a3fccf11d40c8 Mon Sep 17 00:00:00 2001 From: David Szotten Date: Tue, 25 Jul 2023 17:32:00 +0100 Subject: [PATCH 7/7] peek by cloning self.chars --- parser/Cargo.toml | 1 - parser/src/string.rs | 27 +++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index ed094744..b6c20ff8 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -33,7 +33,6 @@ num-bigint = { workspace = true, optional = true } num-traits = { workspace = true } unicode_names2 = { workspace = true } -multipeek = "0.1.2" unic-emoji-char = "0.9.0" unic-ucd-ident = "0.9.0" lalrpop-util = { version = "0.20.0", default-features = false } diff --git a/parser/src/string.rs b/parser/src/string.rs index 6c918012..987e0df2 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -11,7 +11,6 @@ use crate::{ token::{StringKind, Tok}, }; use itertools::Itertools; -use multipeek::{multipeek, MultiPeek}; use rustpython_ast::Ranged; use rustpython_parser_core::{ text_size::{TextLen, TextSize}, @@ -22,7 +21,7 @@ use rustpython_parser_core::{ const MAX_UNICODE_NAME: usize = 88; struct StringParser<'a> { - chars: MultiPeek>, + chars: std::str::Chars<'a>, kind: StringKind, location: TextSize, } @@ -36,9 +35,7 @@ impl<'a> StringParser<'a> { TextSize::from(1) }; Self { - // use `multipeek::multipeek` instead of `chars().multipeek` from itertools because we - // want peek to be idempotent and only rarely need to peek one extra char - chars: multipeek(source.chars()), + chars: source.chars(), kind, location: start + offset, } @@ -52,13 +49,15 @@ impl<'a> StringParser<'a> { } #[inline] - fn peek(&mut self) -> Option<&char> { - self.chars.peek() + fn peek(&mut self) -> Option { + self.chars.clone().next() } #[inline] - fn peek2(&mut self) -> Option<&char> { - self.chars.peek_nth(1) + fn peek2(&mut self) -> Option { + let mut chars = self.chars.clone(); + chars.next(); + chars.next() } #[inline] @@ -199,12 +198,12 @@ impl<'a> StringParser<'a> { match ch { // can be integrated better with the remaining code, but as a starting point ok // in general I would do here a tokenizing of the fstrings to omit this peeking. - '!' | '=' | '>' | '<' if self.peek() == Some(&'=') => { + '!' | '=' | '>' | '<' if self.peek() == Some('=') => { expression.push(ch); expression.push('='); self.next_char(); } - '!' if delimiters.is_empty() && self.peek() != Some(&'=') => { + '!' if delimiters.is_empty() && self.peek() != Some('=') => { if expression.trim().is_empty() { return Err(FStringError::new(EmptyExpression, self.get_pos()).into()); } @@ -233,7 +232,7 @@ impl<'a> StringParser<'a> { // match a python 3.8 self documenting expression // format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}' - '=' if self.peek() != Some(&'=') && delimiters.is_empty() => { + '=' if self.peek() != Some('=') && delimiters.is_empty() => { self_documenting = true; } @@ -411,7 +410,7 @@ impl<'a> StringParser<'a> { let mut spec_constructor = Vec::new(); let mut constant_piece = String::new(); let mut start_location = self.get_pos(); - while let Some(&next) = self.peek() { + while let Some(next) = self.peek() { match next { '{' => { if !constant_piece.is_empty() { @@ -466,7 +465,7 @@ impl<'a> StringParser<'a> { let mut start_location = self.get_pos(); let mut values = vec![]; - while let Some(&ch) = self.peek() { + while let Some(ch) = self.peek() { match ch { '{' => { if nested == 0 {