Skip to content

Commit 2bb72a4

Browse files
committed
docs and fixes
1 parent ce8b35c commit 2bb72a4

File tree

5 files changed

+80
-11
lines changed

5 files changed

+80
-11
lines changed

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,23 @@ similar semantics are represented with the same AST. We welcome PRs to fix such
100100
issues and distinguish different syntaxes in the AST.
101101

102102

103+
## WIP: Extracting source locations from AST nodes
104+
105+
This crate allows recovering source locations from AST nodes via the [Spanned](https://docs.rs/sqlparser/latest/sqlparser/ast/trait.Spanned.html) trait, which can be used for advanced diagnostics tooling. Node that this feature is a work in progress and many nodes report missing or inaccurate spans. Please see [this document](./docs/source_spans.md#source-span-contributing-guidelines) for information on how to contribute missing improvements.
106+
107+
```rust
108+
use sqlparser::ast::Spanned;
109+
110+
// Parse SQL
111+
let ast = Parser::parse_sql(&GenericDialect, "SELECT A FROM B").unwrap();
112+
113+
// The source span can be retrieved with start and end locations
114+
assert_eq!(ast[0].span(), Span {
115+
start: Location::of(1, 1),
116+
end: Location::of(1, 16),
117+
});
118+
```
119+
103120
## SQL compliance
104121

105122
SQL was first standardized in 1987, and revisions of the standard have been

docs/source_spans.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
## Breaking Changes
3+
4+
These are the current breaking changes introduced by the source spans feature:
5+
6+
#### Added fields for spans (must be added to any existing pattern matches)
7+
- `Ident` now stores a `Span`
8+
- `Select`, `With`, `Cte`, `WildcardAdditionalOptions` now store a `TokenWithLocation`
9+
10+
#### Misc.
11+
- `TokenWithLocation` stores a full `Span`, rathern than just a source location. Users relying on `token.location` should use `token.location.start` instead.
12+
## Source Span Contributing Guidelines
13+
14+
For contributing source spans improvement in addition to the general [contribution guidelines](../README.md#contributing), please make sure to pay attention to the following:
15+
16+
17+
### Source Span Design Considerations
18+
19+
- `Ident` always have correct source spans
20+
- Downstream breaking change impact is to be as minimal as possible
21+
- To this end, use recursive merging of spans in favor of storing spans on all nodes
22+
- Any metadata added to compute spans must not change semantics (Eq, Ord, Hash, etc.)
23+
24+
The primary reason for missing and inaccurate source spans at this time is missing spans of keyword tokens and values in many structures, either due to lack of time or because adding them would break downstream significantly.
25+
26+
When considering adding support for source spans on a type, consider the impact to consumers of that type and whether your change would require a consumer to do non-trivial changes to their code.
27+
28+
f.e. of a trivial change
29+
```rust
30+
match node {
31+
ast::Query {
32+
field1,
33+
field2,
34+
location: _, // add a new line to ignored location
35+
}
36+
```
37+
38+
If adding source spans to a type would require a significant change like wrapping that type or similar, please open an issue to discuss.
39+
40+
### AST Node Equality and Hashes
41+
42+
When adding tokens to AST nodes, make sure to wrap them in the [IgnoreField](https://docs.rs/sqlparser/latest/sqlparser/ast/helpers/struct.IgnoreField.html) helper to ensure that semantically equivalent AST nodes always compare as equal and hash to the same value. F.e. `select 5` and `SELECT 5` would compare as different `Select` nodes, if the select token was stored directly. f.e.
43+
44+
```rust
45+
struct Select {
46+
select_token: IgnoreField<TokenWithLocation>, // only used for spans
47+
/// remaining fields
48+
field1,
49+
field2,
50+
...
51+
}
52+
```

src/ast/query.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ impl fmt::Display for NamedWindowDefinition {
511511
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
512512
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
513513
pub struct With {
514-
pub with_token: TokenWithLocation,
514+
pub with_token: IgnoreField<TokenWithLocation>,
515515
pub recursive: bool,
516516
pub cte_tables: Vec<Cte>,
517517
}
@@ -564,7 +564,7 @@ pub struct Cte {
564564
pub from: Option<Ident>,
565565
pub materialized: Option<CteAsMaterialized>,
566566
// needed for accurate span reporting
567-
pub closing_paren_token: TokenWithLocation,
567+
pub closing_paren_token: IgnoreField<TokenWithLocation>,
568568
}
569569

570570
impl fmt::Display for Cte {
@@ -620,7 +620,7 @@ impl fmt::Display for IdentWithAlias {
620620
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
621621
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
622622
pub struct WildcardAdditionalOptions {
623-
pub wildcard_token: TokenWithLocation,
623+
pub wildcard_token: IgnoreField<TokenWithLocation>,
624624
/// `[ILIKE...]`.
625625
/// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
626626
pub opt_ilike: Option<IlikeSelectItem>,
@@ -641,7 +641,7 @@ pub struct WildcardAdditionalOptions {
641641
impl Default for WildcardAdditionalOptions {
642642
fn default() -> Self {
643643
Self {
644-
wildcard_token: TokenWithLocation::wrap(Token::Mul),
644+
wildcard_token: TokenWithLocation::wrap(Token::Mul).into(),
645645
opt_ilike: None,
646646
opt_exclude: None,
647647
opt_except: None,

src/ast/spans.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ impl Spanned for With {
100100
} = self;
101101

102102
union_spans(
103-
core::iter::once(with_token.span).chain(cte_tables.iter().map(|item| item.span())),
103+
core::iter::once(with_token.0.span).chain(cte_tables.iter().map(|item| item.span())),
104104
)
105105
}
106106
}
@@ -119,7 +119,7 @@ impl Spanned for Cte {
119119
core::iter::once(alias.span())
120120
.chain(core::iter::once(query.span()))
121121
.chain(from.iter().map(|item| item.span))
122-
.chain(core::iter::once(closing_paren_token.span)),
122+
.chain(core::iter::once(closing_paren_token.0.span)),
123123
)
124124
}
125125
}
@@ -1547,7 +1547,7 @@ impl Spanned for WildcardAdditionalOptions {
15471547
} = self;
15481548

15491549
union_spans(
1550-
core::iter::once(wildcard_token.span)
1550+
core::iter::once(wildcard_token.0.span)
15511551
.chain(opt_ilike.as_ref().map(|i| i.span()))
15521552
.chain(opt_exclude.as_ref().map(|i| i.span()))
15531553
.chain(opt_rename.as_ref().map(|i| i.span()))

src/parser/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8799,7 +8799,7 @@ impl<'a> Parser<'a> {
87998799
let _guard = self.recursion_counter.try_decrease()?;
88008800
let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) {
88018801
Some(With {
8802-
with_token,
8802+
with_token: with_token.into(),
88038803
recursive: self.parse_keyword(Keyword::RECURSIVE),
88048804
cte_tables: self.parse_comma_separated(Parser::parse_cte)?,
88058805
})
@@ -9070,7 +9070,7 @@ impl<'a> Parser<'a> {
90709070
query,
90719071
from: None,
90729072
materialized: is_materialized,
9073-
closing_paren_token,
9073+
closing_paren_token: closing_paren_token.into(),
90749074
}
90759075
} else {
90769076
let columns = self.parse_parenthesized_column_list(Optional, false)?;
@@ -9094,7 +9094,7 @@ impl<'a> Parser<'a> {
90949094
query,
90959095
from: None,
90969096
materialized: is_materialized,
9097-
closing_paren_token,
9097+
closing_paren_token: closing_paren_token.into(),
90989098
}
90999099
};
91009100
if self.parse_keyword(Keyword::FROM) {
@@ -11365,7 +11365,7 @@ impl<'a> Parser<'a> {
1136511365
};
1136611366

1136711367
Ok(WildcardAdditionalOptions {
11368-
wildcard_token,
11368+
wildcard_token: wildcard_token.into(),
1136911369
opt_ilike,
1137011370
opt_exclude,
1137111371
opt_except,

0 commit comments

Comments
 (0)