Skip to content
This repository was archived by the owner on Jul 27, 2023. It is now read-only.

Commit f39a6de

Browse files
charliermarshzanieb
authored andcommitted
Add TextRange to Identifier (#8)
This PR adds `TextRange` to `Identifier`. Right now, the AST only includes ranges for identifiers in certain cases (`Expr::Name`, `Keyword`, etc.), namely when the identifier comprises an entire AST node. In Ruff, we do additional ad-hoc lexing to extract identifiers from source code. One frequent example: given a function `def f(): ...`, we lex to find the range of `f`, for use in diagnostics. Another: `except ValueError as e`, for which the AST doesn't include a range for `e`. Note that, as an optimization, we avoid storing the `TextRange` for `Expr::Name`, since it's already included.
1 parent 037dc28 commit f39a6de

File tree

86 files changed

+4451
-4975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+4451
-4975
lines changed

ast/asdl_rs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,12 @@ def visitField(self, field, parent, vis, depth, constructor=None):
630630
if typ == "Int":
631631
typ = BUILTIN_INT_NAMES.get(field.name, typ)
632632
name = rust_field(field.name)
633+
634+
# Use a String, rather than an Identifier, for the `id` field of `Expr::Name`.
635+
# Names already include a range, so there's no need to duplicate the span.
636+
if name == "id":
637+
typ = "String"
638+
633639
self.emit(f"{vis}{name}: {typ},", depth)
634640

635641
def visitProduct(self, product, type, depth):

ast/src/builtin.rs

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,87 @@
11
//! `builtin_types` in asdl.py and Attributed
22
3+
use rustpython_parser_core::text_size::TextRange;
4+
35
use crate::bigint::BigInt;
6+
use crate::Ranged;
47

58
pub type String = std::string::String;
69

710
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
8-
pub struct Identifier(String);
11+
pub struct Identifier {
12+
id: String,
13+
range: TextRange,
14+
}
915

1016
impl Identifier {
1117
#[inline]
12-
pub fn new(s: impl Into<String>) -> Self {
13-
Self(s.into())
18+
pub fn new(id: impl Into<String>, range: TextRange) -> Self {
19+
Self {
20+
id: id.into(),
21+
range,
22+
}
1423
}
1524
}
1625

1726
impl Identifier {
1827
#[inline]
1928
pub fn as_str(&self) -> &str {
20-
self.0.as_str()
29+
self.id.as_str()
2130
}
2231
}
2332

24-
impl std::cmp::PartialEq<str> for Identifier {
33+
impl PartialEq<str> for Identifier {
2534
#[inline]
2635
fn eq(&self, other: &str) -> bool {
27-
self.0 == other
36+
self.id == other
2837
}
2938
}
3039

31-
impl std::cmp::PartialEq<String> for Identifier {
40+
impl PartialEq<String> for Identifier {
3241
#[inline]
3342
fn eq(&self, other: &String) -> bool {
34-
&self.0 == other
43+
&self.id == other
3544
}
3645
}
3746

3847
impl std::ops::Deref for Identifier {
3948
type Target = str;
4049
#[inline]
4150
fn deref(&self) -> &Self::Target {
42-
self.0.as_str()
51+
self.id.as_str()
4352
}
4453
}
4554

4655
impl AsRef<str> for Identifier {
4756
#[inline]
4857
fn as_ref(&self) -> &str {
49-
self.0.as_str()
58+
self.id.as_str()
5059
}
5160
}
5261

5362
impl AsRef<String> for Identifier {
5463
#[inline]
5564
fn as_ref(&self) -> &String {
56-
&self.0
65+
&self.id
5766
}
5867
}
5968

6069
impl std::fmt::Display for Identifier {
6170
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62-
self.0.fmt(f)
71+
self.id.fmt(f)
6372
}
6473
}
6574

6675
impl From<Identifier> for String {
6776
#[inline]
68-
fn from(id: Identifier) -> String {
69-
id.0
77+
fn from(identifier: Identifier) -> String {
78+
identifier.id
7079
}
7180
}
7281

73-
impl From<String> for Identifier {
74-
#[inline]
75-
fn from(id: String) -> Self {
76-
Self(id)
77-
}
78-
}
79-
80-
impl<'a> From<&'a str> for Identifier {
81-
#[inline]
82-
fn from(id: &'a str) -> Identifier {
83-
id.to_owned().into()
82+
impl Ranged for Identifier {
83+
fn range(&self) -> TextRange {
84+
self.range
8485
}
8586
}
8687

@@ -207,6 +208,7 @@ impl std::fmt::Display for Constant {
207208
#[cfg(test)]
208209
mod tests {
209210
use super::*;
211+
210212
#[test]
211213
fn test_is_macro() {
212214
let none = Constant::None;

ast/src/gen/generic.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1607,7 +1607,7 @@ impl<R> From<ExprStarred<R>> for Ast<R> {
16071607
#[derive(Clone, Debug, PartialEq)]
16081608
pub struct ExprName<R = TextRange> {
16091609
pub range: R,
1610-
pub id: Identifier,
1610+
pub id: String,
16111611
pub ctx: ExprContext,
16121612
}
16131613

ast/src/impls.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,4 @@ static_assertions::assert_eq_size!(crate::Stmt, [u8; 160]);
6161
#[cfg(target_arch = "x86_64")]
6262
static_assertions::assert_eq_size!(crate::Pattern, [u8; 96]);
6363
#[cfg(target_arch = "x86_64")]
64-
static_assertions::assert_eq_size!(crate::ExceptHandler, [u8; 64]);
64+
static_assertions::assert_eq_size!(crate::ExceptHandler, [u8; 72]);

parser/src/function.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ pub(crate) fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentLis
9090
Some((start, end, name)) => {
9191
// Check for duplicate keyword arguments in the call.
9292
if let Some(keyword_name) = &name {
93-
if !keyword_names.insert(keyword_name.clone()) {
93+
if !keyword_names.insert(keyword_name.to_string()) {
9494
return Err(LexicalError {
9595
error: LexicalErrorType::DuplicateKeywordArgumentError(
9696
keyword_name.to_string(),
@@ -103,7 +103,7 @@ pub(crate) fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentLis
103103
}
104104

105105
keywords.push(ast::Keyword {
106-
arg: name.map(ast::Identifier::new),
106+
arg: name.map(|name| ast::Identifier::new(name, TextRange::new(start, end))),
107107
value,
108108
range: TextRange::new(start, end),
109109
});

parser/src/parser.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,10 @@ impl Parse for ast::Identifier {
213213
) -> Result<Self, ParseError> {
214214
let expr = ast::Expr::parse_tokens(lxr, source_path)?;
215215
match expr {
216-
ast::Expr::Name(name) => Ok(name.id),
216+
ast::Expr::Name(name) => {
217+
let range = name.range();
218+
Ok(ast::Identifier::new(name.id, range))
219+
}
217220
expr => Err(ParseError {
218221
error: ParseErrorType::InvalidToken,
219222
offset: expr.range().start(),

parser/src/python.lalrpop

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ ImportAsNames: Vec<ast::Alias> = {
267267
<location:@L> "(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" <end_location:@R> => i,
268268
<location:@L> "*" <end_location:@R> => {
269269
// Star import all
270-
vec![ast::Alias { name: ast::Identifier::new("*"), asname: None, range: (location..end_location).into() }]
270+
vec![ast::Alias { name: ast::Identifier::new("*", (location..end_location).into()), asname: None, range: (location..end_location).into() }]
271271
},
272272
};
273273

@@ -279,14 +279,14 @@ ImportAsAlias<I>: ast::Alias = {
279279

280280
// A name like abc or abc.def.ghi
281281
DottedName: ast::Identifier = {
282-
<n:name> => ast::Identifier::new(n),
283-
<n:name> <n2: ("." Identifier)+> => {
282+
<location:@L> <n:name> <end_location:@R> => ast::Identifier::new(n, (location..end_location).into()),
283+
<location:@L> <n:name> <n2: ("." Identifier)+> <end_location:@R> => {
284284
let mut r = n.to_string();
285285
for x in n2 {
286286
r.push('.');
287287
r.push_str(x.1.as_str());
288288
}
289-
ast::Identifier::new(r)
289+
ast::Identifier::new(r, (location..end_location).into())
290290
},
291291
};
292292

@@ -564,8 +564,8 @@ CapturePattern: ast::Pattern = {
564564
}
565565

566566
MatchName: ast::Expr = {
567-
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr::Name(
568-
ast::ExprName { id: name, ctx: ast::ExprContext::Load, range: (location..end_location).into() },
567+
<location:@L> <id:Identifier> <end_location:@R> => ast::Expr::Name(
568+
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Load, range: (location..end_location).into() },
569569
),
570570
}
571571

@@ -981,7 +981,7 @@ FuncDef: ast::Stmt = {
981981

982982
TypeAliasName: ast::Expr = {
983983
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr::Name(
984-
ast::ExprName { id: name, ctx: ast::ExprContext::Load, range: (location..end_location).into() },
984+
ast::ExprName { id: name.into(), ctx: ast::ExprContext::Load, range: (location..end_location).into() },
985985
),
986986
}
987987

@@ -1235,7 +1235,7 @@ NamedExpression: ast::Expr = {
12351235
ast::Expr::NamedExpr(
12361236
ast::ExprNamedExpr {
12371237
target: Box::new(ast::Expr::Name(
1238-
ast::ExprName { id, ctx: ast::ExprContext::Store, range: (location..end_location).into() },
1238+
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Store, range: (location..end_location).into() },
12391239
)),
12401240
range: (location..value.end()).into(),
12411241
value: Box::new(value),
@@ -1451,8 +1451,8 @@ Atom<Goal>: ast::Expr = {
14511451
<location:@L> <value:Constant> <end_location:@R> => ast::Expr::Constant(
14521452
ast::ExprConstant { value, kind: None, range: (location..end_location).into() }
14531453
),
1454-
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr::Name(
1455-
ast::ExprName { id: name, ctx: ast::ExprContext::Load, range: (location..end_location).into() }
1454+
<location:@L> <id:Identifier> <end_location:@R> => ast::Expr::Name(
1455+
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Load, range: (location..end_location).into() }
14561456
),
14571457
<location:@L> "[" <e:ListLiteralValues?> "]"<end_location:@R> => {
14581458
let elts = e.unwrap_or_default();
@@ -1687,7 +1687,7 @@ Constant: ast::Constant = {
16871687
};
16881688

16891689
Identifier: ast::Identifier = {
1690-
<s:name> => ast::Identifier::new(s)
1690+
<location:@L> <s:name> <end_location:@R> => ast::Identifier::new(s, (location..end_location).into())
16911691
};
16921692

16931693
// Hook external lexer:

0 commit comments

Comments
 (0)