From 50032f298b2815e41fa7ca1cea41562fd0e804ee Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Fri, 22 Sep 2023 12:26:44 +0300
Subject: [PATCH 01/16] feat: implement the non-proc macro version of
 resolve_tokens

---
 crates/parser/src/get_children_codegen.rs |  30 +++++
 crates/parser/src/get_location_codegen.rs |   3 +
 crates/parser/src/lib.rs                  |   3 +
 crates/parser/src/resolve_tokens.rs       | 141 ++++++++++++++++++++++
 crates/parser/src/statement_parser.rs     |   9 +-
 5 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 crates/parser/src/get_children_codegen.rs
 create mode 100644 crates/parser/src/get_location_codegen.rs
 create mode 100644 crates/parser/src/resolve_tokens.rs
diff --git a/crates/parser/src/get_children_codegen.rs b/crates/parser/src/get_children_codegen.rs
new file mode 100644
index 00000000..13b895f2
--- /dev/null
+++ b/crates/parser/src/get_children_codegen.rs
@@ -0,0 +1,30 @@
+use codegen::get_children;
+
+get_children!();
+
+#[cfg(test)]
+mod tests {
+    use crate::get_children_codegen::get_children;
+
+    #[test]
+    fn test_get_children() {
+        let input = "with c as (insert into contact (id) values ('id')) select * from c;";
+
+        let pg_query_root = match pg_query::parse(input) {
+            Ok(parsed) => Some(
+                parsed
+                    .protobuf
+                    .nodes()
+                    .iter()
+                    .find(|n| n.1 == 1)
+                    .unwrap()
+                    .0
+                    .to_enum(),
+            ),
+            Err(_) => None,
+        };
+
+        let children = get_children(&pg_query_root.unwrap(), input.to_string(), 1);
+        assert_eq!(children.len(), 13);
+    }
+}
diff --git a/crates/parser/src/get_location_codegen.rs b/crates/parser/src/get_location_codegen.rs
new file mode 100644
index 00000000..fcc6685d
--- /dev/null
+++ b/crates/parser/src/get_location_codegen.rs
@@ -0,0 +1,3 @@
+use codegen::get_location;
+
+get_location!();
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 90b2f9a2..8be85d21 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -16,7 +16,10 @@
 //! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
 
 mod ast_node;
+mod get_children_codegen;
+mod get_location_codegen;
 mod parser;
+mod resolve_tokens;
 mod sibling_token;
 mod source_parser;
 mod statement_parser;
diff --git a/crates/parser/src/resolve_tokens.rs b/crates/parser/src/resolve_tokens.rs
new file mode 100644
index 00000000..d2dde198
--- /dev/null
+++ b/crates/parser/src/resolve_tokens.rs
@@ -0,0 +1,141 @@
+use crate::get_children_codegen::ChildrenNode;
+use crate::get_location_codegen::get_location;
+use cstree::text::{TextRange, TextSize};
+use pg_query::{protobuf::ScanToken, NodeEnum};
+
+// all tokens of a node beneath it
+// get estimation for each node location from tokens
+// and also node range
+//
+// how to handle tokens that cannot be put beneath node based on the ast?
+// pass token -> if not beneath current node, apply immediately
+
+#[derive(Debug, Clone)]
+pub struct NestedNode {
+    pub node: NodeEnum,
+    pub depth: i32,
+    pub path: String,
+    pub tokens: Vec<i32>,
+    pub range: TextRange,
+}
+
+/// Turns a `Vec<ChildrenNode>` into a `Vec<NestedNode>` by adding `tokens` and `range` to each node.
+///
+/// For each node, we walk all properties and search for tokens that match the property value. The
+/// token that is closest to the node or a parent is used.
+///
+/// The node range is the minimum start and maximum end of all tokens.
+pub fn resolve_tokens(
+    children: &Vec<ChildrenNode>,
+    tokens: &Vec<ScanToken>,
+    text: &str,
+) -> Vec<NestedNode> {
+    children
+        .iter()
+        .map(|c| {
+            let nearest_parent_location = get_nearest_parent_location(&c, children);
+            let furthest_child_location = get_furthest_child_location(&c, children);
+
+            let mut child_tokens = Vec::new();
+
+            let mut find_token = |property: String| {
+                child_tokens.push(
+                    tokens
+                        .iter()
+                        .filter_map(|t| {
+                            if get_token_text(
+                                usize::try_from(t.start).unwrap(),
+                                usize::try_from(t.end).unwrap(),
+                                text,
+                            ) != property
+                            {
+                                return None;
+                            }
+
+                            if furthest_child_location.is_some()
+                                && furthest_child_location.unwrap() < t.start as i32
+                            {
+                                return None;
+                            }
+
+                            let distance = t.start - nearest_parent_location;
+                            if distance > 0 {
+                                Some((distance, t))
+                            } else {
+                                None
+                            }
+                        })
+                        .min_by_key(|(d, _)| d.to_owned())
+                        .map(|(_, t)| t)
+                        .unwrap(),
+                );
+            };
+
+            match &c.node {
+                NodeEnum::RangeVar(n) => {
+                    find_token(n.relname.to_owned());
+                }
+                _ => {}
+            };
+
+            NestedNode {
+                node: c.node.to_owned(),
+                depth: c.depth,
+                path: c.path.to_owned(),
+                tokens: child_tokens.iter().map(|t| t.token).collect(),
+                range: TextRange::new(
+                    TextSize::from(
+                        child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32,
+                    ),
+                    TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
+                ),
+            }
+        })
+        .collect()
+}
+
+fn get_token_text(start: usize, end: usize, text: &str) -> String {
+    text.chars()
+        .skip(start)
+        .take(end - start)
+        .collect::<String>()
+}
+
+fn get_furthest_child_location(c: &ChildrenNode, children: &Vec<ChildrenNode>) -> Option<i32> {
+    children
+        .iter()
+        .filter_map(|n| {
+            if !n.path.starts_with(c.path.as_str()) {
+                return None;
+            }
+            get_location(&n.node)
+        })
+        .max()
+}
+
+fn get_nearest_parent_location(n: &ChildrenNode, children: &Vec<ChildrenNode>) -> i32 {
+    // if location is set, return it
+    let location = get_location(&n.node);
+    if location.is_some() {
+        return location.unwrap();
+    }
+
+    // go up in the tree and check if location exists on any parent
+    let mut path_elements = n.path.split(".").collect::<Vec<&str>>();
+    path_elements.pop();
+    while path_elements.len() > 0 {
+        let parent_path = path_elements.join(".");
+        let node = children.iter().find(|c| c.path == parent_path);
+        if node.is_some() {
+            let location = get_location(&node.unwrap().node);
+            if location.is_some() {
+                return location.unwrap();
+            }
+        }
+
+        path_elements.pop();
+    }
+
+    // fallback to 0
+    return 0;
+}
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index d0f6a25c..68644702 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,7 +1,7 @@
 use cstree::text::{TextRange, TextSize};
 use logos::{Logos, Span};
 
-use crate::{parser::Parser, syntax_kind_codegen::SyntaxKind};
+use crate::{get_children_codegen::get_children, parser::Parser, syntax_kind_codegen::SyntaxKind};
 
 /// A super simple lexer for sql statements.
 ///
@@ -83,6 +83,13 @@ impl Parser {
             }
         };
 
+        let mut pg_query_nodes = match &pg_query_root {
+            Some(root) => get_children(root, text.to_string(), 1)
+                .into_iter()
+                .peekable(),
+            None => Vec::new().into_iter().peekable(),
+        };
+
         let mut lexer = StatementToken::lexer(&text);
 
         // parse root node if no syntax errors

From 06b5836254f0c74b01d3dfebf288f6efde7c966c Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Sun, 24 Sep 2023 18:51:04 +0300
Subject: [PATCH 02/16] feat: resolve tokens

---
 crates/parser/src/resolve_tokens.rs   | 23 ++++++++---------------
 crates/parser/src/statement_parser.rs | 21 +++++++++++++++------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/crates/parser/src/resolve_tokens.rs b/crates/parser/src/resolve_tokens.rs
index d2dde198..93a8a877 100644
--- a/crates/parser/src/resolve_tokens.rs
+++ b/crates/parser/src/resolve_tokens.rs
@@ -3,18 +3,11 @@ use crate::get_location_codegen::get_location;
 use cstree::text::{TextRange, TextSize};
 use pg_query::{protobuf::ScanToken, NodeEnum};
 
-// all tokens of a node beneath it
-// get estimation for each node location from tokens
-// and also node range
-//
-// how to handle tokens that cannot be put beneath node based on the ast?
-// pass token -> if not beneath current node, apply immediately
-
 #[derive(Debug, Clone)]
 pub struct NestedNode {
-    pub node: NodeEnum,
-    pub depth: i32,
-    pub path: String,
+    pub id: usize,
+    pub inner: ChildrenNode,
+    // .start property of `ScanToken`
     pub tokens: Vec<i32>,
     pub range: TextRange,
 }
@@ -32,7 +25,8 @@ pub fn resolve_tokens(
 ) -> Vec<NestedNode> {
     children
         .iter()
-        .map(|c| {
+        .enumerate()
+        .map(|(idx, c)| {
             let nearest_parent_location = get_nearest_parent_location(&c, children);
             let furthest_child_location = get_furthest_child_location(&c, children);
 
@@ -79,10 +73,9 @@ pub fn resolve_tokens(
             };
 
             NestedNode {
-                node: c.node.to_owned(),
-                depth: c.depth,
-                path: c.path.to_owned(),
-                tokens: child_tokens.iter().map(|t| t.token).collect(),
+                id: idx,
+                inner: c.to_owned(),
+                tokens: child_tokens.iter().map(|t| t.start).collect(),
                 range: TextRange::new(
                     TextSize::from(
                         child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32,
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 68644702..fde95be8 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,7 +1,10 @@
 use cstree::text::{TextRange, TextSize};
 use logos::{Logos, Span};
 
-use crate::{get_children_codegen::get_children, parser::Parser, syntax_kind_codegen::SyntaxKind};
+use crate::{
+    get_children_codegen::get_children, parser::Parser, resolve_tokens::resolve_tokens,
+    syntax_kind_codegen::SyntaxKind,
+};
 
 /// A super simple lexer for sql statements.
 ///
@@ -57,10 +60,10 @@ impl Parser {
         );
 
         let mut pg_query_tokens = match pg_query::scan(text) {
-            Ok(scanned) => scanned.tokens.into_iter().peekable(),
+            Ok(scanned) => scanned.tokens,
             Err(e) => {
                 self.error(e.to_string(), range);
-                Vec::new().into_iter().peekable()
+                Vec::new()
             }
         };
 
@@ -84,12 +87,18 @@ impl Parser {
         };
 
         let mut pg_query_nodes = match &pg_query_root {
-            Some(root) => get_children(root, text.to_string(), 1)
-                .into_iter()
-                .peekable(),
+            Some(root) => resolve_tokens(
+                &get_children(root, text.to_string(), 1),
+                &pg_query_tokens,
+                &text,
+            )
+            .into_iter()
+            .peekable(),
             None => Vec::new().into_iter().peekable(),
         };
 
+        let mut pg_query_tokens = pg_query_tokens.iter().peekable();
+
         let mut lexer = StatementToken::lexer(&text);
 
         // parse root node if no syntax errors

From 2b7729d47691a6e5d3ab3c3edc89c284449cb918 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Sun, 1 Oct 2023 16:31:48 +0200
Subject: [PATCH 03/16] feat: refactor and improve all over the place

---
 .../src/{get_children.rs => get_nodes.rs}     |  12 +-
 crates/codegen/src/lib.rs                     |   8 +-
 ...ildren_codegen.rs => get_nodes_codegen.rs} |  12 +-
 crates/parser/src/lib.rs                      |   4 +-
 crates/parser/src/parser.rs                   |  72 +---
 crates/parser/src/resolve_tokens.rs           | 185 +++++----
 crates/parser/src/sibling_token.rs            |  31 --
 crates/parser/src/source_parser.rs            |  17 +-
 crates/parser/src/statement_parser.rs         | 380 ++++++++++++++----
 9 files changed, 429 insertions(+), 292 deletions(-)
 rename crates/codegen/src/{get_children.rs => get_nodes.rs} (92%)
 rename crates/parser/src/{get_children_codegen.rs => get_nodes_codegen.rs} (67%)
 delete mode 100644 crates/parser/src/sibling_token.rs

diff --git a/crates/codegen/src/get_children.rs b/crates/codegen/src/get_nodes.rs
similarity index 92%
rename from crates/codegen/src/get_children.rs
rename to crates/codegen/src/get_nodes.rs
index e92c5f6e..26fd5a53 100644
--- a/crates/codegen/src/get_children.rs
+++ b/crates/codegen/src/get_nodes.rs
@@ -2,7 +2,7 @@ use pg_query_proto_parser::{FieldType, Node, ProtoParser};
 use proc_macro2::{Ident, TokenStream};
 use quote::{format_ident, quote};
 
-pub fn get_children_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
+pub fn get_nodes_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
     let parser = ProtoParser::new("./libpg_query/protobuf/pg_query.proto");
     let proto_file = parser.parse();
 
@@ -16,7 +16,7 @@ pub fn get_children_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenSt
         use std::collections::VecDeque;
 
         #[derive(Debug, Clone)]
-        pub struct ChildrenNode {
+        pub struct Node {
             pub node: NodeEnum,
             pub depth: i32,
             pub path: String,
@@ -24,8 +24,10 @@ pub fn get_children_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenSt
 
         /// Returns all children of the node, recursively
         /// location is resolved manually
-        pub fn get_children(node: &NodeEnum, text: String, current_depth: i32) -> Vec<ChildrenNode> {
-            let mut nodes: Vec<ChildrenNode> = vec![];
+        pub fn get_nodes(node: &NodeEnum, text: String, current_depth: i32) -> Vec<Node> {
+            let mut nodes: Vec<Node> = vec![
+                Node { node: node.to_owned(), depth: current_depth, path: "0".to_string() }
+            ];
             // Node, depth, path
             let mut stack: VecDeque<(NodeEnum, i32, String)> =
                 VecDeque::from(vec![(node.to_owned(), current_depth, "0".to_string())]);
@@ -37,7 +39,7 @@ pub fn get_children_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenSt
                     let path = path.clone() + "." + child_ctr.to_string().as_str();
                     child_ctr = child_ctr + 1;
                     stack.push_back((c.to_owned(), current_depth, path.clone()));
-                    nodes.push(ChildrenNode {
+                    nodes.push(Node {
                         node: c,
                         depth: current_depth,
                         path: path.clone(),
diff --git a/crates/codegen/src/lib.rs b/crates/codegen/src/lib.rs
index fba42ea7..bc63d4f6 100644
--- a/crates/codegen/src/lib.rs
+++ b/crates/codegen/src/lib.rs
@@ -1,14 +1,14 @@
-mod get_children;
 mod get_location;
+mod get_nodes;
 mod syntax_kind;
 
-use get_children::get_children_mod;
 use get_location::get_location_mod;
+use get_nodes::get_nodes_mod;
 use syntax_kind::syntax_kind_mod;
 
 #[proc_macro]
-pub fn get_children(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    get_children_mod(item.into()).into()
+pub fn get_nodes(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    get_nodes_mod(item.into()).into()
 }
 
 #[proc_macro]
diff --git a/crates/parser/src/get_children_codegen.rs b/crates/parser/src/get_nodes_codegen.rs
similarity index 67%
rename from crates/parser/src/get_children_codegen.rs
rename to crates/parser/src/get_nodes_codegen.rs
index 13b895f2..3305baab 100644
--- a/crates/parser/src/get_children_codegen.rs
+++ b/crates/parser/src/get_nodes_codegen.rs
@@ -1,13 +1,13 @@
-use codegen::get_children;
+use codegen::get_nodes;
 
-get_children!();
+get_nodes!();
 
 #[cfg(test)]
 mod tests {
-    use crate::get_children_codegen::get_children;
+    use crate::get_nodes_codegen::get_nodes;
 
     #[test]
-    fn test_get_children() {
+    fn test_get_nodes() {
         let input = "with c as (insert into contact (id) values ('id')) select * from c;";
 
         let pg_query_root = match pg_query::parse(input) {
@@ -24,7 +24,7 @@ mod tests {
             Err(_) => None,
         };
 
-        let children = get_children(&pg_query_root.unwrap(), input.to_string(), 1);
-        assert_eq!(children.len(), 13);
+        let nodes = get_nodes(&pg_query_root.unwrap(), input.to_string(), 1);
+        assert_eq!(nodes.len(), 14);
     }
 }
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 8be85d21..5344ff27 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -16,17 +16,15 @@
 //! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
 
 mod ast_node;
-mod get_children_codegen;
 mod get_location_codegen;
+mod get_nodes_codegen;
 mod parser;
 mod resolve_tokens;
-mod sibling_token;
 mod source_parser;
 mod statement_parser;
 mod syntax_error;
 mod syntax_kind_codegen;
 mod syntax_node;
 
-pub use crate::parser::{Parse, Parser};
 pub use crate::syntax_kind_codegen::SyntaxKind;
 pub use crate::syntax_node::{SyntaxElement, SyntaxNode, SyntaxToken};
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs
index 80219f8d..2f311ba5 100644
--- a/crates/parser/src/parser.rs
+++ b/crates/parser/src/parser.rs
@@ -1,6 +1,5 @@
 use cstree::syntax::ResolvedNode;
 use cstree::{build::GreenNodeBuilder, text::TextRange};
-use log::debug;
 use pg_query::NodeEnum;
 
 use crate::ast_node::RawStmt;
@@ -8,7 +7,7 @@ use crate::syntax_error::SyntaxError;
 use crate::syntax_kind_codegen::SyntaxKind;
 use crate::syntax_node::SyntaxNode;
 
-/// Main parser that controls the cst building process, and collects errors and statements
+/// Main parser that exposes the `cstree` api, and collects errors and statements
 #[derive(Debug)]
 pub struct Parser {
     /// The cst builder
@@ -17,16 +16,9 @@ pub struct Parser {
     errors: Vec<SyntaxError>,
     /// The pg_query statements representing the abtract syntax tree
     stmts: Vec<RawStmt>,
-    /// The current checkpoint depth, if any
-    checkpoint: Option<i32>,
-    /// Whether the parser is currently parsing a flat node
-    is_parsing_flat_node: bool,
-    /// Keeps track of currently open nodes
-    /// Latest opened is last
-    open_nodes: Vec<(SyntaxKind, i32)>,
 }
 
-/// Result of parsing
+/// Result of Building
 #[derive(Debug)]
 pub struct Parse {
     /// The concrete syntax tree
@@ -43,72 +35,16 @@ impl Parser {
             inner: GreenNodeBuilder::new(),
             errors: Vec::new(),
             stmts: Vec::new(),
-            checkpoint: None,
-            is_parsing_flat_node: false,
-            open_nodes: Vec::new(),
         }
     }
 
-    /// close all nodes until the specified depth is reached
-    pub fn close_until_depth(&mut self, depth: i32) {
-        debug!("close until depth {}", depth);
-        if self.open_nodes.is_empty() || self.get_current_depth() < depth {
-            return;
-        }
-        loop {
-            if self.open_nodes.is_empty() || self.get_current_depth() < depth {
-                break;
-            }
-            self.finish_node();
-        }
-    }
-
-    fn get_current_depth(&self) -> i32 {
-        self.open_nodes[self.open_nodes.len() - 1].1
-    }
-
-    /// set a checkpoint at current depth
-    ///
-    /// if `is_parsing_flat_node` is true, all tokens parsed until this checkpoint is closed will be applied immediately
-    pub fn set_checkpoint(&mut self) {
-        assert!(
-            self.checkpoint.is_none(),
-            "Must close previouos checkpoint before setting new one"
-        );
-        self.checkpoint = Some(self.get_current_depth());
-    }
-
-    /// close all nodes until checkpoint depth is reached
-    pub fn close_checkpoint(&mut self) {
-        if self.checkpoint.is_some() {
-            self.close_until_depth(self.checkpoint.unwrap());
-        }
-        self.checkpoint = None;
-        self.is_parsing_flat_node = false;
-    }
-
-    /// start a new node of `SyntaxKind` at `depth`
-    /// handles closing previous nodes if necessary
-    pub fn start_node_at(&mut self, kind: SyntaxKind, depth: i32) {
-        debug!("starting node at depth {} {:?}", depth, kind);
-        // close until target depth
-        self.close_until_depth(depth);
-
-        self.open_nodes.push((kind, depth));
-        debug!("start node {:?}", kind);
+    /// start a new node of `SyntaxKind`
+    pub fn start_node(&mut self, kind: SyntaxKind) {
         self.inner.start_node(kind);
     }
 
     /// finish current node
     pub fn finish_node(&mut self) {
-        debug!("finish_node");
-
-        let n = self.open_nodes.pop();
-        if n.is_none() {
-            panic!("No node to finish");
-        }
-
-        debug!("finish node {:?}", n.unwrap().0);
         self.inner.finish_node();
     }
 
diff --git a/crates/parser/src/resolve_tokens.rs b/crates/parser/src/resolve_tokens.rs
index 93a8a877..9c01b471 100644
--- a/crates/parser/src/resolve_tokens.rs
+++ b/crates/parser/src/resolve_tokens.rs
@@ -1,88 +1,96 @@
-use crate::get_children_codegen::ChildrenNode;
+use std::{
+    cmp::{max, min},
+    convert::identity,
+};
+
 use crate::get_location_codegen::get_location;
+use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
 use pg_query::{protobuf::ScanToken, NodeEnum};
 
 #[derive(Debug, Clone)]
-pub struct NestedNode {
-    pub id: usize,
-    pub inner: ChildrenNode,
-    // .start property of `ScanToken`
-    pub tokens: Vec<i32>,
-    pub range: TextRange,
+pub struct RangedNode {
+    pub inner: Node,
+    pub estimated_range: TextRange,
 }
 
-/// Turns a `Vec<ChildrenNode>` into a `Vec<NestedNode>` by adding `tokens` and `range` to each node.
-///
-/// For each node, we walk all properties and search for tokens that match the property value. The
-/// token that is closest to the node or a parent is used.
-///
-/// The node range is the minimum start and maximum end of all tokens.
-pub fn resolve_tokens(
-    children: &Vec<ChildrenNode>,
-    tokens: &Vec<ScanToken>,
-    text: &str,
-) -> Vec<NestedNode> {
-    children
-        .iter()
-        .enumerate()
-        .map(|(idx, c)| {
-            let nearest_parent_location = get_nearest_parent_location(&c, children);
-            let furthest_child_location = get_furthest_child_location(&c, children);
-
-            let mut child_tokens = Vec::new();
-
-            let mut find_token = |property: String| {
-                child_tokens.push(
-                    tokens
-                        .iter()
-                        .filter_map(|t| {
-                            if get_token_text(
-                                usize::try_from(t.start).unwrap(),
-                                usize::try_from(t.end).unwrap(),
-                                text,
-                            ) != property
-                            {
-                                return None;
-                            }
-
-                            if furthest_child_location.is_some()
-                                && furthest_child_location.unwrap() < t.start as i32
-                            {
-                                return None;
-                            }
-
-                            let distance = t.start - nearest_parent_location;
-                            if distance > 0 {
-                                Some((distance, t))
-                            } else {
-                                None
-                            }
-                        })
-                        .min_by_key(|(d, _)| d.to_owned())
-                        .map(|(_, t)| t)
-                        .unwrap(),
-                );
-            };
-
-            match &c.node {
-                NodeEnum::RangeVar(n) => {
-                    find_token(n.relname.to_owned());
-                }
-                _ => {}
-            };
-
-            NestedNode {
-                id: idx,
-                inner: c.to_owned(),
-                tokens: child_tokens.iter().map(|t| t.start).collect(),
-                range: TextRange::new(
-                    TextSize::from(
-                        child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32,
-                    ),
-                    TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
-                ),
+/// Turns a `Vec<Node>` into a `Vec<RangedNode>` by estimating their range.
+pub fn resolve_tokens(nodes: &Vec<Node>, tokens: &Vec<ScanToken>, text: &str) -> Vec<RangedNode> {
+    let mut ranged_nodes: Vec<RangedNode> = Vec::new();
+
+    // we get an estimated range by searching for tokens that match the node property values
+    // and, if available, the `location` of the node itself
+    nodes.iter().for_each(|n| {
+        let nearest_parent_location = get_nearest_parent_location(&n, nodes);
+        let furthest_child_location = get_furthest_child_location(&n, nodes);
+
+        let mut child_tokens = Vec::new();
+
+        let mut find_token = |property: String| {
+            child_tokens.push(
+                tokens
+                    .iter()
+                    .filter_map(|t| {
+                        if get_token_text(
+                            usize::try_from(t.start).unwrap(),
+                            usize::try_from(t.end).unwrap(),
+                            text,
+                        ) != property
+                        {
+                            return None;
+                        }
+
+                        if furthest_child_location.is_some()
+                            && furthest_child_location.unwrap() < t.start as i32
+                        {
+                            return None;
+                        }
+
+                        let distance = t.start - nearest_parent_location;
+                        if distance > 0 {
+                            Some((distance, t))
+                        } else {
+                            None
+                        }
+                    })
+                    .min_by_key(|(d, _)| d.to_owned())
+                    .map(|(_, t)| t)
+                    .unwrap(),
+            );
+        };
+
+        match &n.node {
+            NodeEnum::RangeVar(n) => {
+                find_token(n.relname.to_owned());
             }
+            _ => {}
+        };
+
+        let from_locations: Vec<i32> = [
+            get_location(&n.node),
+            Some(nearest_parent_location),
+            Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start),
+        ]
+        .into_iter()
+        .filter_map(|x| x)
+        .collect();
+
+        ranged_nodes.push(RangedNode {
+            inner: n.to_owned(),
+            estimated_range: TextRange::new(
+                TextSize::from(from_locations.iter().min().unwrap_or(&0).to_owned() as u32),
+                TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
+            ),
+        });
+    });
+
+    // FIXME: this additional loop is not required if we order the nodes by path first
+    ranged_nodes
+        .iter()
+        .map(|n| RangedNode {
+            inner: n.inner.to_owned(),
+            // the range of a node must be larger than the range of all children nodes
+            estimated_range: get_largest_child_range(&n, &ranged_nodes),
         })
         .collect()
 }
@@ -94,7 +102,26 @@ fn get_token_text(start: usize, end: usize, text: &str) -> String {
         .collect::<String>()
 }
 
-fn get_furthest_child_location(c: &ChildrenNode, children: &Vec<ChildrenNode>) -> Option<i32> {
+fn get_largest_child_range(node: &RangedNode, nodes: &Vec<RangedNode>) -> TextRange {
+    let mut start: TextSize = node.estimated_range.start().to_owned();
+    let mut end: TextSize = node.estimated_range.end().to_owned();
+
+    nodes.iter().for_each(|n| {
+        if !n.inner.path.starts_with(node.inner.path.as_str()) {
+            return;
+        }
+        if start < n.estimated_range.start() {
+            start = n.estimated_range.start();
+        }
+        if end > n.estimated_range.end() {
+            end = n.estimated_range.end();
+        }
+    });
+
+    TextRange::new(start, end)
+}
+
+fn get_furthest_child_location(c: &Node, children: &Vec<Node>) -> Option<i32> {
     children
         .iter()
         .filter_map(|n| {
@@ -106,7 +133,7 @@ fn get_furthest_child_location(c: &ChildrenNode, children: &Vec<ChildrenNode>) -
         .max()
 }
 
-fn get_nearest_parent_location(n: &ChildrenNode, children: &Vec<ChildrenNode>) -> i32 {
+fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> i32 {
     // if location is set, return it
     let location = get_location(&n.node);
     if location.is_some() {
diff --git a/crates/parser/src/sibling_token.rs b/crates/parser/src/sibling_token.rs
deleted file mode 100644
index 6a42dd0d..00000000
--- a/crates/parser/src/sibling_token.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-use crate::syntax_kind_codegen::SyntaxKind;
-
-impl SyntaxKind {
-    pub fn is_opening_sibling(&self) -> bool {
-        match self {
-            SyntaxKind::Ascii40 => true,
-            SyntaxKind::Ascii91 => true,
-            SyntaxKind::Case => true,
-            _ => false,
-        }
-    }
-    pub fn is_closing_sibling(&self) -> bool {
-        match self {
-            SyntaxKind::Ascii41 => true,
-            SyntaxKind::Ascii93 => true,
-            SyntaxKind::EndP => true,
-            _ => false,
-        }
-    }
-    pub fn sibling(&self) -> Option<SyntaxKind> {
-        match self {
-            SyntaxKind::Case => Some(SyntaxKind::EndP),
-            SyntaxKind::EndP => Some(SyntaxKind::Case),
-            SyntaxKind::Ascii40 => Some(SyntaxKind::Ascii41),
-            SyntaxKind::Ascii41 => Some(SyntaxKind::Ascii40),
-            SyntaxKind::Ascii91 => Some(SyntaxKind::Ascii93),
-            SyntaxKind::Ascii93 => Some(SyntaxKind::Ascii91),
-            _ => None,
-        }
-    }
-}
diff --git a/crates/parser/src/source_parser.rs b/crates/parser/src/source_parser.rs
index b7a727ec..341d6eb9 100644
--- a/crates/parser/src/source_parser.rs
+++ b/crates/parser/src/source_parser.rs
@@ -75,14 +75,15 @@ fn tokens(input: &str) -> Vec<Token> {
 }
 
 impl Parser {
-    /// Parse a source
-    pub fn parse_source_at(&mut self, text: &str, at_offset: Option<u32>) {
+    fn parse_source_at(&mut self, text: &str, at_offset: Option<u32>) {
         let offset = at_offset.unwrap_or(0);
 
         let tokens = tokens(&text);
         let mut tokens_iter = tokens.iter();
 
-        self.start_node_at(SyntaxKind::SourceFile, 0);
+        // open root `SourceFile` node
+        self.start_node(SyntaxKind::SourceFile);
+
         while let Some(token) = tokens_iter.next() {
             match token.kind {
                 SourceFileToken::Comment => {
@@ -92,13 +93,15 @@ impl Parser {
                     self.token(SyntaxKind::Newline, token.text.as_str());
                 }
                 SourceFileToken::Statement => {
-                    self.parse_statement(
-                        token.text.as_str(),
-                        Some(offset + u32::from(token.span.start())),
-                    );
+                    // self.parse_statement(
+                    //     token.text.as_str(),
+                    //     Some(offset + u32::from(token.span.start())),
+                    // );
                 }
             };
         }
+
+        // close root `SourceFile` node
         self.finish_node();
     }
 }
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index fde95be8..ee461391 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,8 +1,10 @@
+use std::collections::VecDeque;
+
 use cstree::text::{TextRange, TextSize};
-use logos::{Logos, Span};
+use logos::Logos;
 
 use crate::{
-    get_children_codegen::get_children, parser::Parser, resolve_tokens::resolve_tokens,
+    get_nodes_codegen::get_nodes, parser::Parser, resolve_tokens::resolve_tokens,
     syntax_kind_codegen::SyntaxKind,
 };
 
@@ -25,7 +27,6 @@ pub enum StatementToken {
 
 impl StatementToken {
     /// Creates a `SyntaxKind` from a `StatementToken`.
-    /// can be generated.
     pub fn syntax_kind(&self) -> SyntaxKind {
         match self {
             StatementToken::Whitespace => SyntaxKind::Whitespace,
@@ -36,30 +37,48 @@ impl StatementToken {
     }
 }
 
+struct TokenBuffer {
+    tokens: VecDeque<(SyntaxKind, String)>,
+}
+
+impl TokenBuffer {
+    fn new() -> Self {
+        Self {
+            tokens: VecDeque::new(),
+        }
+    }
+
+    fn push(&mut self, kind: SyntaxKind, text: String) {
+        self.tokens.push_back((kind, text));
+    }
+
+    fn drain(&mut self, until: Option<u32>) -> Vec<(SyntaxKind, String)> {
+        if self.tokens.is_empty() {
+            return Vec::new();
+        }
+        let range = match until {
+            Some(u) => 0..u as usize,
+            None => 0..self.tokens.len(),
+        };
+        self.tokens.drain(range).collect::<Vec<_>>()
+    }
+}
+
 impl Parser {
-    /// The main entry point for parsing a statement `text`. `at_offset` is the offset of the statement in the source file.
-    ///
-    /// On a high level, the algorithm works as follows:
-    /// 1. Parse the statement with pg_query.rs. If the statement contains syntax errors, the parser will report the error and continue to work without information
-    ///   about the nodes. The result will be a flat list of tokens under the generic `Stmt` node.
-    ///   If successful, the first node in the ordered list will be the main node of the statement,
-    ///   and serves as a root node.
-    /// 2. Scan the statements for tokens with pg_query.rs. This will never fail, even if the statement contains syntax errors.
-    /// 3. Parse the statement with the `StatementToken` lexer. The lexer only contains the tokens
-    ///    that are not parsed by pg_query.rs, such as whitespace.
-    /// 4. Define a pointer that starts at 0 and move it along the statement.
-    ///    - first, check if the current pointer is within a pg_query token. If so, consume the
-    ///    token.
-    ///    - if not, consume the next token from the `StatementToken` lexer.
-    /// 5. Close all open nodes for that statement.
-    pub fn parse_statement(&mut self, text: &str, at_offset: Option<u32>) {
+    pub fn parse_statement_at(&mut self, text: &str, at_offset: Option<u32>) {
+        // 1. Collect as much information as possible from pg_query.rs and `StatementToken` lexer
+
+        // offset of the statement in the source file.
         let offset = at_offset.unwrap_or(0);
+
+        // range of the statement in the source file.
         let range = TextRange::new(
             TextSize::from(offset),
             TextSize::from(offset + text.len() as u32),
         );
 
-        let mut pg_query_tokens = match pg_query::scan(text) {
+        // tokens from pg_query.rs
+        let pg_query_tokens = match pg_query::scan(text) {
             Ok(scanned) => scanned.tokens,
             Err(e) => {
                 self.error(e.to_string(), range);
@@ -67,8 +86,7 @@ impl Parser {
             }
         };
 
-        // Get root node with depth 1
-        // Since we are parsing only a single statement there can be only a single node at depth 1
+        // root node of the statement, if no syntax errors
         let pg_query_root = match pg_query::parse(text) {
             Ok(parsed) => Some(
                 parsed
@@ -86,9 +104,11 @@ impl Parser {
             }
         };
 
+        // ranged nodes from pg_query.rs, including the root node
+        // the nodes are ordered by starting range, starting with the root node
         let mut pg_query_nodes = match &pg_query_root {
             Some(root) => resolve_tokens(
-                &get_children(root, text.to_string(), 1),
+                &get_nodes(root, text.to_string(), 1),
                 &pg_query_tokens,
                 &text,
             )
@@ -99,109 +119,291 @@ impl Parser {
 
         let mut pg_query_tokens = pg_query_tokens.iter().peekable();
 
-        let mut lexer = StatementToken::lexer(&text);
+        let mut statement_token_lexer = StatementToken::lexer(&text);
+
+        // 2. Setup data structures required for the parsing algorithm
+        // A buffer for tokens that are not applied immediately to the cst
+        let mut token_buffer = TokenBuffer::new();
+        // Keeps track of currently open nodes. Latest opened is last.
+        let mut open_nodes: Vec<(SyntaxKind, TextRange, i32)> = Vec::new();
 
-        // parse root node if no syntax errors
-        if pg_query_root.is_some() {
-            let root_node = pg_query_root.unwrap();
-            self.stmt(root_node.to_owned(), range);
-            self.start_node_at(SyntaxKind::new_from_pg_query_node(&root_node), 1);
+        // 3. Parse the statement
+
+        // Handle root node
+        if pg_query_nodes.len() > 0 {
+            // if there are no syntax errors, use the pg_query node as the root node
+            let root_node = pg_query_nodes
+                .find(|n| n.inner.path == "0".to_string())
+                .unwrap();
+            // can only be at depth 1
+            assert_eq!(
+                root_node.inner.depth, 1,
+                "Root node must be at depth 1, but is at depth {}",
+                root_node.inner.depth
+            );
+            self.stmt(root_node.inner.node.to_owned(), range);
+            self.start_node(SyntaxKind::new_from_pg_query_node(&root_node.inner.node));
+            open_nodes.push((
+                SyntaxKind::new_from_pg_query_node(&root_node.inner.node),
+                range,
+                1,
+            ));
         } else {
             // fallback to generic node as root
-            self.start_node_at(SyntaxKind::Stmt, 1);
+            self.start_node(SyntaxKind::Stmt);
+            open_nodes.push((SyntaxKind::Stmt, range, 1));
         }
-        self.set_checkpoint();
 
         // start at 0, and increment by the length of the token
         let mut pointer: i32 = 0;
 
-        #[derive(Debug)]
-        struct Token {
-            syntax_kind: SyntaxKind,
-            span: Span,
-        }
-
+        // main loop that walks through the statement token by token
         while pointer < text.len() as i32 {
             // Check if the pointer is within a pg_query token
             let next_pg_query_token = pg_query_tokens.peek();
-            let token = if next_pg_query_token.is_some()
+
+            let token_length = if next_pg_query_token.is_some()
                 && next_pg_query_token.unwrap().start <= pointer
                 && pointer <= next_pg_query_token.unwrap().end
             {
                 let token = pg_query_tokens.next().unwrap();
-                Token {
-                    syntax_kind: SyntaxKind::new_from_pg_query_token(&token),
-                    span: Span {
-                        start: token.start as usize,
-                        end: token.end as usize,
-                    },
+
+                let token_text = text
+                    .chars()
+                    .skip(token.start as usize)
+                    .take((token.end as usize) - (token.start as usize))
+                    .collect::<String>();
+
+                // a node can only start and end with a pg_query token, so we can handle them here
+
+                // before applying the token, close any node that ends before the token starts
+                while open_nodes.last().is_some()
+                    && open_nodes.last().unwrap().1.end() <= TextSize::from(token.start as u32)
+                {
+                    self.finish_node();
+                    open_nodes.pop();
                 }
+
+                // drain token buffer
+                for (kind, text) in token_buffer.drain(None) {
+                    self.token(kind, text.as_str());
+                }
+
+                // apply the token
+                self.token(SyntaxKind::new_from_pg_query_token(token), text);
+
+                // consume all nodes that start at or before the token ends
+                while pg_query_nodes.peek().is_some()
+                    && pg_query_nodes.peek().unwrap().estimated_range.start()
+                        <= TextSize::from(token.end as u32)
+                {
+                    let node = pg_query_nodes.next().unwrap();
+                    self.start_node(SyntaxKind::new_from_pg_query_node(&node.inner.node));
+                    open_nodes.push((
+                        SyntaxKind::new_from_pg_query_node(&node.inner.node),
+                        node.estimated_range,
+                        node.inner.depth,
+                    ));
+                }
+
+                token_text.len() as i32
             } else {
                 // fallback to statement token
 
                 // move statement token lexer to before pointer
-                while (lexer.span().end as i32) < pointer {
-                    lexer.next();
+                while (statement_token_lexer.span().end as i32) < pointer {
+                    statement_token_lexer.next();
                 }
-                let token = lexer.next();
-                if token.is_none() || (lexer.span().start as i32) != pointer {
+                let token = statement_token_lexer.next();
+                if token.is_none() || (statement_token_lexer.span().start as i32) != pointer {
                     // if the token is not at the pointer, we have a syntax error
                     panic!(
                         "Expected token for '{}' at offset {}",
-                        lexer.slice(),
-                        lexer.span().start
+                        statement_token_lexer.slice(),
+                        statement_token_lexer.span().start
                     );
                 }
-                Token {
-                    syntax_kind: token.unwrap().unwrap().syntax_kind(),
-                    span: lexer.span(),
-                }
+                let token_text = statement_token_lexer.slice().to_string();
+                token_buffer.push(token.unwrap().unwrap().syntax_kind(), token_text.clone());
+                token_text.len() as i32
             };
 
-            self.token(
-                token.syntax_kind,
-                text.chars()
-                    .skip(token.span.start)
-                    .take(token.span.end - token.span.start)
-                    .collect::<String>()
-                    .as_str(),
-            );
-
-            pointer = pointer + (token.span.end - token.span.start) as i32;
+            pointer = pointer + token_length;
         }
 
-        // close up nodes
-        self.close_checkpoint();
+        while open_nodes.last().is_some() {
+            self.finish_node();
+            open_nodes.pop();
+        }
     }
 }
 
+// impl Parser {
+//     /// The main entry point for parsing a statement `text`. `at_offset` is the offset of the statement in the source file.
+//     ///
+//     /// On a high level, the algorithm works as follows:
+//     /// 1. Parse the statement with pg_query.rs. If the statement contains syntax errors, the parser will report the error and continue to work without information
+//     ///   about the nodes. The result will be a flat list of tokens under the generic `Stmt` node.
+//     ///   If successful, the first node in the ordered list will be the main node of the statement,
+//     ///   and serves as a root node.
+//     /// 2. Scan the statements for tokens with pg_query.rs. This will never fail, even if the statement contains syntax errors.
+//     /// 3. Parse the statement with the `StatementToken` lexer. The lexer only contains the tokens
+//     ///    that are not parsed by pg_query.rs, such as whitespace.
+//     /// 4. Define a pointer that starts at 0 and move it along the statement.
+//     ///    - first, check if the current pointer is within a pg_query token. If so, consume the
+//     ///    token.
+//     ///    - if not, consume the next token from the `StatementToken` lexer.
+//     /// 5. Close all open nodes for that statement.
+//     pub fn parse_statement(&mut self, text: &str, at_offset: Option<u32>) {
+//         let offset = at_offset.unwrap_or(0);
+//         let range = TextRange::new(
+//             TextSize::from(offset),
+//             TextSize::from(offset + text.len() as u32),
+//         );
+//
+//         let mut pg_query_tokens = match pg_query::scan(text) {
+//             Ok(scanned) => scanned.tokens,
+//             Err(e) => {
+//                 self.error(e.to_string(), range);
+//                 Vec::new()
+//             }
+//         };
+//
+//         // Get root node with depth 1
+//         // Since we are parsing only a single statement there can be only a single node at depth 1
+//         let pg_query_root = match pg_query::parse(text) {
+//             Ok(parsed) => Some(
+//                 parsed
+//                     .protobuf
+//                     .nodes()
+//                     .iter()
+//                     .find(|n| n.1 == 1)
+//                     .unwrap()
+//                     .0
+//                     .to_enum(),
+//             ),
+//             Err(e) => {
+//                 self.error(e.to_string(), range);
+//                 None
+//             }
+//         };
+//
+//         let mut pg_query_nodes = match &pg_query_root {
+//             Some(root) => resolve_tokens(
+//                 &get_nodes(root, text.to_string(), 1),
+//                 &pg_query_tokens,
+//                 &text,
+//             )
+//             .into_iter()
+//             .peekable(),
+//             None => Vec::new().into_iter().peekable(),
+//         };
+//
+//         let mut pg_query_tokens = pg_query_tokens.iter().peekable();
+//
+//         let mut lexer = StatementToken::lexer(&text);
+//
+//         // parse root node if no syntax errors
+//         if pg_query_root.is_some() {
+//             let root_node = pg_query_root.unwrap();
+//             self.stmt(root_node.to_owned(), range);
+//             self.start_node_at(SyntaxKind::new_from_pg_query_node(&root_node), 1);
+//         } else {
+//             // fallback to generic node as root
+//             self.start_node_at(SyntaxKind::Stmt, 1);
+//         }
+//         self.set_checkpoint();
+//
+//         // start at 0, and increment by the length of the token
+//         let mut pointer: i32 = 0;
+//
+//         #[derive(Debug)]
+//         struct Token {
+//             syntax_kind: SyntaxKind,
+//             span: Span,
+//         }
+//
+//         while pointer < text.len() as i32 {
+//             // Check if the pointer is within a pg_query token
+//             let next_pg_query_token = pg_query_tokens.peek();
+//             let token = if next_pg_query_token.is_some()
+//                 && next_pg_query_token.unwrap().start <= pointer
+//                 && pointer <= next_pg_query_token.unwrap().end
+//             {
+//                 let token = pg_query_tokens.next().unwrap();
+//                 Token {
+//                     syntax_kind: SyntaxKind::new_from_pg_query_token(&token),
+//                     span: Span {
+//                         start: token.start as usize,
+//                         end: token.end as usize,
+//                     },
+//                 }
+//             } else {
+//                 // fallback to statement token
+//
+//                 // move statement token lexer to before pointer
+//                 while (lexer.span().end as i32) < pointer {
+//                     lexer.next();
+//                 }
+//                 let token = lexer.next();
+//                 if token.is_none() || (lexer.span().start as i32) != pointer {
+//                     // if the token is not at the pointer, we have a syntax error
+//                     panic!(
+//                         "Expected token for '{}' at offset {}",
+//                         lexer.slice(),
+//                         lexer.span().start
+//                     );
+//                 }
+//                 Token {
+//                     syntax_kind: token.unwrap().unwrap().syntax_kind(),
+//                     span: lexer.span(),
+//                 }
+//             };
+//
+//             self.token(
+//                 token.syntax_kind,
+//                 text.chars()
+//                     .skip(token.span.start)
+//                     .take(token.span.end - token.span.start)
+//                     .collect::<String>()
+//                     .as_str(),
+//             );
+//
+//             pointer = pointer + (token.span.end - token.span.start) as i32;
+//         }
+//
+//         // close up nodes
+//         self.close_checkpoint();
+//     }
+// }
+
 #[cfg(test)]
 mod tests {
     use std::assert_eq;
 
     use super::*;
 
-    #[test]
-    fn test_invalid_statement() {
-        let input = "select select;";
-
-        let mut parser = Parser::new();
-        parser.parse_statement(input, None);
-        let parsed = parser.finish();
-
-        assert_eq!(parsed.cst.text(), input);
-    }
-
-    #[test]
-    fn test_create_sql_function() {
-        let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
-    AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$
-    LANGUAGE SQL;";
-
-        let mut parser = Parser::new();
-        parser.parse_statement(input, None);
-        let parsed = parser.finish();
-
-        assert_eq!(parsed.cst.text(), input);
-    }
+    // #[test]
+    // fn test_invalid_statement() {
+    //     let input = "select select;";
+    //
+    //     let mut parser = Parser::new();
+    //     parser.parse_statement(input, None);
+    //     let parsed = parser.finish();
+    //
+    //     assert_eq!(parsed.cst.text(), input);
+    // }
+    //
+    // #[test]
+    // fn test_create_sql_function() {
+    //     let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
+    // AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$
+    // LANGUAGE SQL;";
+    //
+    //     let mut parser = Parser::new();
+    //     parser.parse_statement(input, None);
+    //     let parsed = parser.finish();
+    //
+    //     assert_eq!(parsed.cst.text(), input);
+    // }
 }

From c84f852f5eeffb1c87248c86f52b7d022e3fd7a5 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Sun, 1 Oct 2023 16:36:10 +0200
Subject: [PATCH 04/16] refactor: drop token buffer struct

---
 crates/parser/src/statement_parser.rs | 33 +++------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index ee461391..3687f11c 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -37,33 +37,6 @@ impl StatementToken {
     }
 }
 
-struct TokenBuffer {
-    tokens: VecDeque<(SyntaxKind, String)>,
-}
-
-impl TokenBuffer {
-    fn new() -> Self {
-        Self {
-            tokens: VecDeque::new(),
-        }
-    }
-
-    fn push(&mut self, kind: SyntaxKind, text: String) {
-        self.tokens.push_back((kind, text));
-    }
-
-    fn drain(&mut self, until: Option<u32>) -> Vec<(SyntaxKind, String)> {
-        if self.tokens.is_empty() {
-            return Vec::new();
-        }
-        let range = match until {
-            Some(u) => 0..u as usize,
-            None => 0..self.tokens.len(),
-        };
-        self.tokens.drain(range).collect::<Vec<_>>()
-    }
-}
-
 impl Parser {
     pub fn parse_statement_at(&mut self, text: &str, at_offset: Option<u32>) {
         // 1. Collect as much information as possible from pg_query.rs and `StatementToken` lexer
@@ -123,7 +96,7 @@ impl Parser {
 
         // 2. Setup data structures required for the parsing algorithm
         // A buffer for tokens that are not applied immediately to the cst
-        let mut token_buffer = TokenBuffer::new();
+        let mut token_buffer: VecDeque<(SyntaxKind, String)> = VecDeque::new();
         // Keeps track of currently open nodes. Latest opened is last.
         let mut open_nodes: Vec<(SyntaxKind, TextRange, i32)> = Vec::new();
 
@@ -185,7 +158,7 @@ impl Parser {
                 }
 
                 // drain token buffer
-                for (kind, text) in token_buffer.drain(None) {
+                for (kind, text) in token_buffer.drain(0..token_buffer.len()) {
                     self.token(kind, text.as_str());
                 }
 
@@ -224,7 +197,7 @@ impl Parser {
                     );
                 }
                 let token_text = statement_token_lexer.slice().to_string();
-                token_buffer.push(token.unwrap().unwrap().syntax_kind(), token_text.clone());
+                token_buffer.push_back((token.unwrap().unwrap().syntax_kind(), token_text.clone()));
                 token_text.len() as i32
             };
 

From cf5465a1cbe71a9effec76b72410b3d364631cbc Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Sun, 1 Oct 2023 19:47:47 +0200
Subject: [PATCH 05/16] feat: estimate_node_range poc

---
 crates/parser/src/estimate_node_range.rs     | 271 +++++++++++++++++++
 crates/parser/src/lib.rs                     |   3 +-
 crates/parser/src/resolve_tokens.rs          | 161 -----------
 crates/parser/src/statement_parser.rs        |  14 +-
 crates/parser/tests/statement_parser_test.rs |   2 +-
 5 files changed, 282 insertions(+), 169 deletions(-)
 create mode 100644 crates/parser/src/estimate_node_range.rs
 delete mode 100644 crates/parser/src/resolve_tokens.rs

diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
new file mode 100644
index 00000000..390212e8
--- /dev/null
+++ b/crates/parser/src/estimate_node_range.rs
@@ -0,0 +1,271 @@
+use std::cmp::{max, min};
+
+use crate::get_location_codegen::get_location;
+use crate::get_nodes_codegen::Node;
+use cstree::text::{TextRange, TextSize};
+use pg_query::{protobuf::ScanToken, NodeEnum};
+
+#[derive(Debug, Clone)]
+pub struct RangedNode {
+    pub inner: Node,
+    pub range: TextRange,
+}
+
+/// Turns a `Vec<Node>` into a `Vec<RangedNode>` by estimating their range.
+pub fn estimate_node_range(
+    nodes: &mut Vec<Node>,
+    tokens: &Vec<ScanToken>,
+    text: &str,
+) -> Vec<RangedNode> {
+    let mut ranged_nodes: Vec<RangedNode> = Vec::new();
+
+    // ensure that all children of any given node are already processed before processing the node itself
+    nodes.sort_by(|a, b| b.path.cmp(&a.path));
+
+    // we get an estimated range by searching for tokens that match the node property values
+    // and, if available, the `location` of the node itself
+    nodes.iter().for_each(|n| {
+        // first, get the estimated boundaries of the node based on the `location` property of a node
+        let nearest_parent_location = get_nearest_parent_location(&n, nodes);
+        let furthest_child_location = get_furthest_child_location(&n, nodes);
+
+        let mut child_tokens = Vec::new();
+
+        let mut find_token = |property: String| {
+            println!("find_token {}", property);
+            child_tokens.push(
+                tokens
+                    .iter()
+                    .filter_map(|t| {
+                        println!("token {:#?}", t);
+                        // make a string comparison of the text of the token and the property value
+                        if get_token_text(
+                            usize::try_from(t.start).unwrap(),
+                            usize::try_from(t.end).unwrap(),
+                            text,
+                        )
+                        .to_lowercase()
+                            != property.to_lowercase()
+                        {
+                            println!("token text does not match property value");
+                            return None;
+                        }
+
+                        // if the furthest child location is set, and it is smaller than the start of the token,
+                        // we can safely ignore this token, because it is not a child of the node
+                        if furthest_child_location.is_some()
+                            && furthest_child_location.unwrap() < t.start as i32
+                        {
+                            println!("furthest child location is smaller than token start");
+                            return None;
+                        }
+
+                        // if the token is before the nearest parent location, we can safely ignore it
+                        // if not, we calculate the distance to the nearest parent location
+                        let distance = t.start - nearest_parent_location;
+                        if distance >= 0 {
+                            println!("distance {} for token {:#?}", distance, t);
+                            Some((distance, t))
+                        } else {
+                            println!("distance is smaller than 0 for token {:#?}", t);
+                            None
+                        }
+                    })
+                    // and use the token with the smallest distance to the nearest parent location
+                    .min_by_key(|(d, _)| d.to_owned())
+                    .map(|(_, t)| t)
+                    .unwrap(),
+            );
+        };
+
+        match &n.node {
+            NodeEnum::RangeVar(n) => {
+                find_token(n.relname.to_owned());
+            }
+            NodeEnum::Integer(n) => {
+                find_token(n.ival.to_owned().to_string());
+            }
+            NodeEnum::AConst(n) => {
+                if n.isnull {
+                    find_token("null".to_string());
+                }
+            }
+            NodeEnum::ResTarget(n) => {
+                if n.name.len() > 0 {
+                    find_token(n.name.to_owned());
+                }
+            }
+            NodeEnum::SelectStmt(n) => {
+                find_token("select".to_string());
+            }
+            _ => panic!("Node type not implemented: {:?}", n.node),
+        };
+
+        // For `from`, the location of the node itself is always correct.
+        // If not available, the closest estimation is the smaller value of the start of the first direct child token,
+        // and the start of all children ranges. If neither is available, let’s panic for now.
+        // The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself.
+        let location = get_location(&n.node);
+        let from = if location.is_some() {
+            location.unwrap()
+        } else {
+            let start_of_first_child_token = if child_tokens.len() > 0 {
+                Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start)
+            } else {
+                None
+            };
+            let start_of_all_children_ranges = if ranged_nodes.len() > 0 {
+                Some(
+                    ranged_nodes
+                        .iter()
+                        .filter(|x| x.inner.path.starts_with(n.path.as_str()))
+                        .min_by_key(|n| n.range.start())
+                        .unwrap()
+                        .range
+                        .start(),
+                )
+            } else {
+                None
+            };
+
+            if start_of_first_child_token.is_some() {
+                if start_of_all_children_ranges.is_some() {
+                    min(
+                        start_of_first_child_token.unwrap(),
+                        u32::from(start_of_all_children_ranges.unwrap()) as i32,
+                    )
+                } else {
+                    start_of_first_child_token.unwrap()
+                }
+            } else if start_of_all_children_ranges.is_some() {
+                u32::from(start_of_all_children_ranges.unwrap()) as i32
+            } else {
+                panic!("No location or child tokens found for node {:?}", n);
+            }
+        };
+
+        // For `to`, it’s the larger value of the end of the last direkt child token, and the end of all children ranges.
+        println!("{}: {:?}", n.path, n.node);
+        let end_of_last_child_token = if child_tokens.len() > 0 {
+            Some(child_tokens.iter().max_by_key(|t| t.end).unwrap().end)
+        } else {
+            None
+        };
+        let end_of_all_children_ranges = if ranged_nodes.len() > 0 {
+            Some(
+                ranged_nodes
+                    .iter()
+                    .filter(|x| x.inner.path.starts_with(n.path.as_str()))
+                    .max_by_key(|n| n.range.end())
+                    .unwrap()
+                    .range
+                    .end(),
+            )
+        } else {
+            None
+        };
+        let to = if end_of_last_child_token.is_some() {
+            if end_of_all_children_ranges.is_some() {
+                max(
+                    end_of_last_child_token.unwrap(),
+                    u32::from(end_of_all_children_ranges.unwrap()) as i32,
+                )
+            } else {
+                end_of_last_child_token.unwrap()
+            }
+        } else if end_of_all_children_ranges.is_some() {
+            u32::from(end_of_all_children_ranges.unwrap()) as i32
+        } else {
+            panic!("No child tokens or children ranges found for node {:?}", n);
+        };
+
+        ranged_nodes.push(RangedNode {
+            inner: n.to_owned(),
+            range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)),
+        });
+    });
+
+    ranged_nodes
+}
+
+fn get_token_text(start: usize, end: usize, text: &str) -> String {
+    text.chars()
+        .skip(start)
+        .take(end - start)
+        .collect::<String>()
+}
+
+fn get_furthest_child_location(c: &Node, children: &Vec<Node>) -> Option<i32> {
+    children
+        .iter()
+        .filter_map(|n| {
+            if !n.path.starts_with(c.path.as_str()) {
+                return None;
+            }
+            get_location(&n.node)
+        })
+        .max()
+}
+
+fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> i32 {
+    // if location is set, return it
+    let location = get_location(&n.node);
+    if location.is_some() {
+        return location.unwrap();
+    }
+
+    // go up in the tree and check if location exists on any parent
+    let mut path_elements = n.path.split(".").collect::<Vec<&str>>();
+    path_elements.pop();
+    while path_elements.len() > 0 {
+        let parent_path = path_elements.join(".");
+        let node = children.iter().find(|c| c.path == parent_path);
+        if node.is_some() {
+            let location = get_location(&node.unwrap().node);
+            if location.is_some() {
+                return location.unwrap();
+            }
+        }
+
+        path_elements.pop();
+    }
+
+    // fallback to 0
+    return 0;
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::estimate_node_range::estimate_node_range;
+    use crate::get_nodes_codegen::get_nodes;
+
+    #[test]
+    fn test_estimate_node_range() {
+        let input = "select null";
+
+        let pg_query_tokens = match pg_query::scan(input) {
+            Ok(scanned) => scanned.tokens,
+            Err(_) => Vec::new(),
+        };
+
+        let pg_query_root = match pg_query::parse(input) {
+            Ok(parsed) => Some(
+                parsed
+                    .protobuf
+                    .nodes()
+                    .iter()
+                    .find(|n| n.1 == 1)
+                    .unwrap()
+                    .0
+                    .to_enum(),
+            ),
+            Err(_) => None,
+        };
+
+        let mut nodes = get_nodes(&pg_query_root.unwrap(), input.to_string(), 1);
+
+        let ranged_nodes = estimate_node_range(&mut nodes, &pg_query_tokens, &input);
+
+        dbg!(&ranged_nodes);
+    }
+}
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 5344ff27..3bc10622 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -16,15 +16,16 @@
 //! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
 
 mod ast_node;
+mod estimate_node_range;
 mod get_location_codegen;
 mod get_nodes_codegen;
 mod parser;
-mod resolve_tokens;
 mod source_parser;
 mod statement_parser;
 mod syntax_error;
 mod syntax_kind_codegen;
 mod syntax_node;
 
+pub use crate::parser::{Parse, Parser};
 pub use crate::syntax_kind_codegen::SyntaxKind;
 pub use crate::syntax_node::{SyntaxElement, SyntaxNode, SyntaxToken};
diff --git a/crates/parser/src/resolve_tokens.rs b/crates/parser/src/resolve_tokens.rs
deleted file mode 100644
index 9c01b471..00000000
--- a/crates/parser/src/resolve_tokens.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-use std::{
-    cmp::{max, min},
-    convert::identity,
-};
-
-use crate::get_location_codegen::get_location;
-use crate::get_nodes_codegen::Node;
-use cstree::text::{TextRange, TextSize};
-use pg_query::{protobuf::ScanToken, NodeEnum};
-
-#[derive(Debug, Clone)]
-pub struct RangedNode {
-    pub inner: Node,
-    pub estimated_range: TextRange,
-}
-
-/// Turns a `Vec<Node>` into a `Vec<RangedNode>` by estimating their range.
-pub fn resolve_tokens(nodes: &Vec<Node>, tokens: &Vec<ScanToken>, text: &str) -> Vec<RangedNode> {
-    let mut ranged_nodes: Vec<RangedNode> = Vec::new();
-
-    // we get an estimated range by searching for tokens that match the node property values
-    // and, if available, the `location` of the node itself
-    nodes.iter().for_each(|n| {
-        let nearest_parent_location = get_nearest_parent_location(&n, nodes);
-        let furthest_child_location = get_furthest_child_location(&n, nodes);
-
-        let mut child_tokens = Vec::new();
-
-        let mut find_token = |property: String| {
-            child_tokens.push(
-                tokens
-                    .iter()
-                    .filter_map(|t| {
-                        if get_token_text(
-                            usize::try_from(t.start).unwrap(),
-                            usize::try_from(t.end).unwrap(),
-                            text,
-                        ) != property
-                        {
-                            return None;
-                        }
-
-                        if furthest_child_location.is_some()
-                            && furthest_child_location.unwrap() < t.start as i32
-                        {
-                            return None;
-                        }
-
-                        let distance = t.start - nearest_parent_location;
-                        if distance > 0 {
-                            Some((distance, t))
-                        } else {
-                            None
-                        }
-                    })
-                    .min_by_key(|(d, _)| d.to_owned())
-                    .map(|(_, t)| t)
-                    .unwrap(),
-            );
-        };
-
-        match &n.node {
-            NodeEnum::RangeVar(n) => {
-                find_token(n.relname.to_owned());
-            }
-            _ => {}
-        };
-
-        let from_locations: Vec<i32> = [
-            get_location(&n.node),
-            Some(nearest_parent_location),
-            Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start),
-        ]
-        .into_iter()
-        .filter_map(|x| x)
-        .collect();
-
-        ranged_nodes.push(RangedNode {
-            inner: n.to_owned(),
-            estimated_range: TextRange::new(
-                TextSize::from(from_locations.iter().min().unwrap_or(&0).to_owned() as u32),
-                TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
-            ),
-        });
-    });
-
-    // FIXME: this additional loop is not required if we order the nodes by path first
-    ranged_nodes
-        .iter()
-        .map(|n| RangedNode {
-            inner: n.inner.to_owned(),
-            // the range of a node must be larger than the range of all children nodes
-            estimated_range: get_largest_child_range(&n, &ranged_nodes),
-        })
-        .collect()
-}
-
-fn get_token_text(start: usize, end: usize, text: &str) -> String {
-    text.chars()
-        .skip(start)
-        .take(end - start)
-        .collect::<String>()
-}
-
-fn get_largest_child_range(node: &RangedNode, nodes: &Vec<RangedNode>) -> TextRange {
-    let mut start: TextSize = node.estimated_range.start().to_owned();
-    let mut end: TextSize = node.estimated_range.end().to_owned();
-
-    nodes.iter().for_each(|n| {
-        if !n.inner.path.starts_with(node.inner.path.as_str()) {
-            return;
-        }
-        if start < n.estimated_range.start() {
-            start = n.estimated_range.start();
-        }
-        if end > n.estimated_range.end() {
-            end = n.estimated_range.end();
-        }
-    });
-
-    TextRange::new(start, end)
-}
-
-fn get_furthest_child_location(c: &Node, children: &Vec<Node>) -> Option<i32> {
-    children
-        .iter()
-        .filter_map(|n| {
-            if !n.path.starts_with(c.path.as_str()) {
-                return None;
-            }
-            get_location(&n.node)
-        })
-        .max()
-}
-
-fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> i32 {
-    // if location is set, return it
-    let location = get_location(&n.node);
-    if location.is_some() {
-        return location.unwrap();
-    }
-
-    // go up in the tree and check if location exists on any parent
-    let mut path_elements = n.path.split(".").collect::<Vec<&str>>();
-    path_elements.pop();
-    while path_elements.len() > 0 {
-        let parent_path = path_elements.join(".");
-        let node = children.iter().find(|c| c.path == parent_path);
-        if node.is_some() {
-            let location = get_location(&node.unwrap().node);
-            if location.is_some() {
-                return location.unwrap();
-            }
-        }
-
-        path_elements.pop();
-    }
-
-    // fallback to 0
-    return 0;
-}
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 3687f11c..e1888998 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,10 +1,12 @@
-use std::collections::VecDeque;
+use std::{collections::VecDeque, iter::Peekable};
 
 use cstree::text::{TextRange, TextSize};
 use logos::Logos;
 
 use crate::{
-    get_nodes_codegen::get_nodes, parser::Parser, resolve_tokens::resolve_tokens,
+    estimate_node_range::{estimate_node_range, RangedNode},
+    get_nodes_codegen::get_nodes,
+    parser::Parser,
     syntax_kind_codegen::SyntaxKind,
 };
 
@@ -80,8 +82,8 @@ impl Parser {
         // ranged nodes from pg_query.rs, including the root node
         // the nodes are ordered by starting range, starting with the root node
         let mut pg_query_nodes = match &pg_query_root {
-            Some(root) => resolve_tokens(
-                &get_nodes(root, text.to_string(), 1),
+            Some(root) => estimate_node_range(
+                &mut get_nodes(root, text.to_string(), 1),
                 &pg_query_tokens,
                 &text,
             )
@@ -167,14 +169,14 @@ impl Parser {
 
                 // consume all nodes that start at or before the token ends
                 while pg_query_nodes.peek().is_some()
-                    && pg_query_nodes.peek().unwrap().estimated_range.start()
+                    && pg_query_nodes.peek().unwrap().range.start()
                         <= TextSize::from(token.end as u32)
                 {
                     let node = pg_query_nodes.next().unwrap();
                     self.start_node(SyntaxKind::new_from_pg_query_node(&node.inner.node));
                     open_nodes.push((
                         SyntaxKind::new_from_pg_query_node(&node.inner.node),
-                        node.estimated_range,
+                        node.range,
                         node.inner.depth,
                     ));
                 }
diff --git a/crates/parser/tests/statement_parser_test.rs b/crates/parser/tests/statement_parser_test.rs
index 4eef7303..77542df1 100644
--- a/crates/parser/tests/statement_parser_test.rs
+++ b/crates/parser/tests/statement_parser_test.rs
@@ -23,7 +23,7 @@ fn valid_statements() {
         let contents = fs::read_to_string(&path).unwrap();
 
         let mut parser = Parser::new();
-        parser.parse_statement(&contents, None);
+        parser.parse_statement_at(&contents, None);
         let parsed = parser.finish();
 
         let mut settings = insta::Settings::clone_current();

From 88de8438b09044d24834fdf189d1c639c5d42025 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Tue, 3 Oct 2023 17:22:53 +0200
Subject: [PATCH 06/16] minor improvements to estimate_node_range poc

---
 crates/parser/src/estimate_node_range.rs | 164 ++++++++++++++++-------
 1 file changed, 117 insertions(+), 47 deletions(-)

diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index 390212e8..addc6408 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -3,7 +3,7 @@ use std::cmp::{max, min};
 use crate::get_location_codegen::get_location;
 use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
-use pg_query::{protobuf::ScanToken, NodeEnum};
+use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
 
 #[derive(Debug, Clone)]
 pub struct RangedNode {
@@ -31,72 +31,109 @@ pub fn estimate_node_range(
 
         let mut child_tokens = Vec::new();
 
-        let mut find_token = |property: String| {
-            println!("find_token {}", property);
-            child_tokens.push(
-                tokens
-                    .iter()
-                    .filter_map(|t| {
-                        println!("token {:#?}", t);
-                        // make a string comparison of the text of the token and the property value
-                        if get_token_text(
+        #[derive(Debug)]
+        struct TokenProperty {
+            value: Option<String>,
+            token: Option<Token>,
+        }
+
+        impl TokenProperty {
+            fn from_int(value: &i32) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+
+            fn from_string(value: &String) -> TokenProperty {
+                assert!(value.len() > 0, "String property value has length 0");
+                TokenProperty {
+                    value: Some(value.to_owned()),
+                    token: None,
+                }
+            }
+
+            fn from_token(token: Token) -> TokenProperty {
+                TokenProperty {
+                    value: None,
+                    token: Some(token),
+                }
+            }
+        }
+
+        let mut get_token = |property: TokenProperty| {
+            let token = tokens
+                .iter()
+                .filter_map(|t| {
+                    if property.token.is_some() {
+                        // if a token is set, we can safely ignore all tokens that are not of the same type
+                        if t.token() != property.token.unwrap() {
+                            return None;
+                        }
+                    }
+                    // make a string comparison of the text of the token and the property value
+                    if property.value.is_some()
+                        && get_token_text(
                             usize::try_from(t.start).unwrap(),
                             usize::try_from(t.end).unwrap(),
                             text,
                         )
                         .to_lowercase()
-                            != property.to_lowercase()
-                        {
-                            println!("token text does not match property value");
-                            return None;
-                        }
+                            != property.value.as_ref().unwrap().to_lowercase()
+                    {
+                        return None;
+                    }
 
-                        // if the furthest child location is set, and it is smaller than the start of the token,
-                        // we can safely ignore this token, because it is not a child of the node
-                        if furthest_child_location.is_some()
-                            && furthest_child_location.unwrap() < t.start as i32
-                        {
-                            println!("furthest child location is smaller than token start");
-                            return None;
-                        }
+                    // if the furthest child location is set, and it is smaller than the start of the token,
+                    // we can safely ignore this token, because it is not a child of the node
+                    if furthest_child_location.is_some()
+                        && furthest_child_location.unwrap() < t.start as i32
+                    {
+                        return None;
+                    }
 
-                        // if the token is before the nearest parent location, we can safely ignore it
-                        // if not, we calculate the distance to the nearest parent location
-                        let distance = t.start - nearest_parent_location;
-                        if distance >= 0 {
-                            println!("distance {} for token {:#?}", distance, t);
-                            Some((distance, t))
-                        } else {
-                            println!("distance is smaller than 0 for token {:#?}", t);
-                            None
-                        }
-                    })
-                    // and use the token with the smallest distance to the nearest parent location
-                    .min_by_key(|(d, _)| d.to_owned())
-                    .map(|(_, t)| t)
-                    .unwrap(),
-            );
+                    // if the token is before the nearest parent location, we can safely ignore it
+                    // if not, we calculate the distance to the nearest parent location
+                    let distance = t.start - nearest_parent_location;
+                    if distance >= 0 {
+                        Some((distance, t))
+                    } else {
+                        None
+                    }
+                })
+                // and use the token with the smallest distance to the nearest parent location
+                .min_by_key(|(d, _)| d.to_owned())
+                .map(|(_, t)| t);
+
+            if token.is_none() {
+                panic!(
+                    "No matching token found for property {:?} in {:#?}",
+                    property, tokens
+                );
+            }
+
+            child_tokens.push(token.unwrap());
         };
 
         match &n.node {
             NodeEnum::RangeVar(n) => {
-                find_token(n.relname.to_owned());
+                get_token(TokenProperty::from_string(&n.relname));
             }
             NodeEnum::Integer(n) => {
-                find_token(n.ival.to_owned().to_string());
+                get_token(TokenProperty::from_int(&n.ival));
             }
             NodeEnum::AConst(n) => {
                 if n.isnull {
-                    find_token("null".to_string());
+                    get_token(TokenProperty::from_token(Token::NullP));
                 }
             }
             NodeEnum::ResTarget(n) => {
                 if n.name.len() > 0 {
-                    find_token(n.name.to_owned());
+                    get_token(TokenProperty::from_string(&n.name));
                 }
             }
-            NodeEnum::SelectStmt(n) => {
-                find_token("select".to_string());
+            NodeEnum::SelectStmt(_) => {
+                get_token(TokenProperty::from_token(Token::Select));
             }
             _ => panic!("Node type not implemented: {:?}", n.node),
         };
@@ -145,7 +182,6 @@ pub fn estimate_node_range(
         };
 
         // For `to`, it’s the larger value of the end of the last direkt child token, and the end of all children ranges.
-        println!("{}: {:?}", n.path, n.node);
         let end_of_last_child_token = if child_tokens.len() > 0 {
             Some(child_tokens.iter().max_by_key(|t| t.end).unwrap().end)
         } else {
@@ -236,6 +272,9 @@ fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> i32 {
 
 #[cfg(test)]
 mod tests {
+    use cstree::text::{TextRange, TextSize};
+    use pg_query::NodeEnum;
+
     use crate::estimate_node_range::estimate_node_range;
     use crate::get_nodes_codegen::get_nodes;
 
@@ -266,6 +305,37 @@ mod tests {
 
         let ranged_nodes = estimate_node_range(&mut nodes, &pg_query_tokens, &input);
 
-        dbg!(&ranged_nodes);
+        assert!(ranged_nodes
+            .iter()
+            .find(
+                |n| n.range == TextRange::new(TextSize::from(0), TextSize::from(11))
+                    && match &n.inner.node {
+                        NodeEnum::SelectStmt(_) => true,
+                        _ => false,
+                    }
+            )
+            .is_some());
+
+        assert!(ranged_nodes
+            .iter()
+            .find(
+                |n| n.range == TextRange::new(TextSize::from(7), TextSize::from(11))
+                    && match &n.inner.node {
+                        NodeEnum::ResTarget(_) => true,
+                        _ => false,
+                    }
+            )
+            .is_some());
+
+        assert!(ranged_nodes
+            .iter()
+            .find(
+                |n| n.range == TextRange::new(TextSize::from(7), TextSize::from(11))
+                    && match &n.inner.node {
+                        NodeEnum::AConst(_) => true,
+                        _ => false,
+                    }
+            )
+            .is_some());
     }
 }

From 52a660c9c16e0673590d8654c5f5b4254c6e3b5e Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Tue, 3 Oct 2023 17:41:05 +0200
Subject: [PATCH 07/16] fix: statement parser

---
 crates/parser/src/estimate_node_range.rs |  13 +-
 crates/parser/src/statement_parser.rs    | 170 +++--------------------
 2 files changed, 26 insertions(+), 157 deletions(-)

diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index addc6408..fce88a89 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -215,10 +215,15 @@ pub fn estimate_node_range(
             panic!("No child tokens or children ranges found for node {:?}", n);
         };
 
-        ranged_nodes.push(RangedNode {
-            inner: n.to_owned(),
-            range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)),
-        });
+        // TODO: validate that prepending is enough to ensure that `ranged_nodes` is sorted by
+        // range.start
+        ranged_nodes.insert(
+            0,
+            RangedNode {
+                inner: n.to_owned(),
+                range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)),
+            },
+        );
     });
 
     ranged_nodes
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index e1888998..6d0187fa 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -164,9 +164,6 @@ impl Parser {
                     self.token(kind, text.as_str());
                 }
 
-                // apply the token
-                self.token(SyntaxKind::new_from_pg_query_token(token), text);
-
                 // consume all nodes that start at or before the token ends
                 while pg_query_nodes.peek().is_some()
                     && pg_query_nodes.peek().unwrap().range.start()
@@ -181,6 +178,12 @@ impl Parser {
                     ));
                 }
 
+                // apply the token
+                self.token(
+                    SyntaxKind::new_from_pg_query_token(token),
+                    token_text.as_str(),
+                );
+
                 token_text.len() as i32
             } else {
                 // fallback to statement token
@@ -213,162 +216,23 @@ impl Parser {
     }
 }
 
-// impl Parser {
-//     /// The main entry point for parsing a statement `text`. `at_offset` is the offset of the statement in the source file.
-//     ///
-//     /// On a high level, the algorithm works as follows:
-//     /// 1. Parse the statement with pg_query.rs. If the statement contains syntax errors, the parser will report the error and continue to work without information
-//     ///   about the nodes. The result will be a flat list of tokens under the generic `Stmt` node.
-//     ///   If successful, the first node in the ordered list will be the main node of the statement,
-//     ///   and serves as a root node.
-//     /// 2. Scan the statements for tokens with pg_query.rs. This will never fail, even if the statement contains syntax errors.
-//     /// 3. Parse the statement with the `StatementToken` lexer. The lexer only contains the tokens
-//     ///    that are not parsed by pg_query.rs, such as whitespace.
-//     /// 4. Define a pointer that starts at 0 and move it along the statement.
-//     ///    - first, check if the current pointer is within a pg_query token. If so, consume the
-//     ///    token.
-//     ///    - if not, consume the next token from the `StatementToken` lexer.
-//     /// 5. Close all open nodes for that statement.
-//     pub fn parse_statement(&mut self, text: &str, at_offset: Option<u32>) {
-//         let offset = at_offset.unwrap_or(0);
-//         let range = TextRange::new(
-//             TextSize::from(offset),
-//             TextSize::from(offset + text.len() as u32),
-//         );
-//
-//         let mut pg_query_tokens = match pg_query::scan(text) {
-//             Ok(scanned) => scanned.tokens,
-//             Err(e) => {
-//                 self.error(e.to_string(), range);
-//                 Vec::new()
-//             }
-//         };
-//
-//         // Get root node with depth 1
-//         // Since we are parsing only a single statement there can be only a single node at depth 1
-//         let pg_query_root = match pg_query::parse(text) {
-//             Ok(parsed) => Some(
-//                 parsed
-//                     .protobuf
-//                     .nodes()
-//                     .iter()
-//                     .find(|n| n.1 == 1)
-//                     .unwrap()
-//                     .0
-//                     .to_enum(),
-//             ),
-//             Err(e) => {
-//                 self.error(e.to_string(), range);
-//                 None
-//             }
-//         };
-//
-//         let mut pg_query_nodes = match &pg_query_root {
-//             Some(root) => resolve_tokens(
-//                 &get_nodes(root, text.to_string(), 1),
-//                 &pg_query_tokens,
-//                 &text,
-//             )
-//             .into_iter()
-//             .peekable(),
-//             None => Vec::new().into_iter().peekable(),
-//         };
-//
-//         let mut pg_query_tokens = pg_query_tokens.iter().peekable();
-//
-//         let mut lexer = StatementToken::lexer(&text);
-//
-//         // parse root node if no syntax errors
-//         if pg_query_root.is_some() {
-//             let root_node = pg_query_root.unwrap();
-//             self.stmt(root_node.to_owned(), range);
-//             self.start_node_at(SyntaxKind::new_from_pg_query_node(&root_node), 1);
-//         } else {
-//             // fallback to generic node as root
-//             self.start_node_at(SyntaxKind::Stmt, 1);
-//         }
-//         self.set_checkpoint();
-//
-//         // start at 0, and increment by the length of the token
-//         let mut pointer: i32 = 0;
-//
-//         #[derive(Debug)]
-//         struct Token {
-//             syntax_kind: SyntaxKind,
-//             span: Span,
-//         }
-//
-//         while pointer < text.len() as i32 {
-//             // Check if the pointer is within a pg_query token
-//             let next_pg_query_token = pg_query_tokens.peek();
-//             let token = if next_pg_query_token.is_some()
-//                 && next_pg_query_token.unwrap().start <= pointer
-//                 && pointer <= next_pg_query_token.unwrap().end
-//             {
-//                 let token = pg_query_tokens.next().unwrap();
-//                 Token {
-//                     syntax_kind: SyntaxKind::new_from_pg_query_token(&token),
-//                     span: Span {
-//                         start: token.start as usize,
-//                         end: token.end as usize,
-//                     },
-//                 }
-//             } else {
-//                 // fallback to statement token
-//
-//                 // move statement token lexer to before pointer
-//                 while (lexer.span().end as i32) < pointer {
-//                     lexer.next();
-//                 }
-//                 let token = lexer.next();
-//                 if token.is_none() || (lexer.span().start as i32) != pointer {
-//                     // if the token is not at the pointer, we have a syntax error
-//                     panic!(
-//                         "Expected token for '{}' at offset {}",
-//                         lexer.slice(),
-//                         lexer.span().start
-//                     );
-//                 }
-//                 Token {
-//                     syntax_kind: token.unwrap().unwrap().syntax_kind(),
-//                     span: lexer.span(),
-//                 }
-//             };
-//
-//             self.token(
-//                 token.syntax_kind,
-//                 text.chars()
-//                     .skip(token.span.start)
-//                     .take(token.span.end - token.span.start)
-//                     .collect::<String>()
-//                     .as_str(),
-//             );
-//
-//             pointer = pointer + (token.span.end - token.span.start) as i32;
-//         }
-//
-//         // close up nodes
-//         self.close_checkpoint();
-//     }
-// }
-
 #[cfg(test)]
 mod tests {
     use std::assert_eq;
 
     use super::*;
 
-    // #[test]
-    // fn test_invalid_statement() {
-    //     let input = "select select;";
-    //
-    //     let mut parser = Parser::new();
-    //     parser.parse_statement(input, None);
-    //     let parsed = parser.finish();
-    //
-    //     assert_eq!(parsed.cst.text(), input);
-    // }
-    //
+    #[test]
+    fn test_statement() {
+        let input = "select null;";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
     // #[test]
     // fn test_create_sql_function() {
     //     let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)

From 789673ba5678004b67a54337f9a169ce6176c99d Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Tue, 3 Oct 2023 19:50:09 +0200
Subject: [PATCH 08/16] feat: codegen for get_child_tokens

---
 crates/codegen/src/get_child_tokens.rs        | 253 ++++++++++++++++++
 crates/codegen/src/lib.rs                     |   7 +
 crates/parser/src/estimate_node_range.rs      | 123 +--------
 crates/parser/src/get_child_tokens_codegen.rs |   3 +
 crates/parser/src/lib.rs                      |   1 +
 crates/parser/src/statement_parser.rs         |   8 +-
 6 files changed, 275 insertions(+), 120 deletions(-)
 create mode 100644 crates/codegen/src/get_child_tokens.rs
 create mode 100644 crates/parser/src/get_child_tokens_codegen.rs

diff --git a/crates/codegen/src/get_child_tokens.rs b/crates/codegen/src/get_child_tokens.rs
new file mode 100644
index 00000000..ce386882
--- /dev/null
+++ b/crates/codegen/src/get_child_tokens.rs
@@ -0,0 +1,253 @@
+use pg_query_proto_parser::{FieldType, Node, ProtoParser};
+use proc_macro2::{Ident, TokenStream};
+use quote::{format_ident, quote};
+
+pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
+    let parser = ProtoParser::new("./libpg_query/protobuf/pg_query.proto");
+
+    let proto_file = parser.parse();
+
+    let node_identifiers = node_identifiers(&proto_file.nodes);
+    let node_handlers = node_handlers(&proto_file.nodes);
+
+    quote! {
+        use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
+
+        #[derive(Debug)]
+        struct TokenProperty {
+            value: Option<String>,
+            token: Option<Token>,
+        }
+
+        impl From<i32> for TokenProperty {
+            fn from(value: i32) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+        impl From<u32> for TokenProperty {
+            fn from(value: u32) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+
+        impl From<i64> for TokenProperty {
+            fn from(value: i64) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+        impl From<u64> for TokenProperty {
+            fn from(value: u64) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+        impl From<f64> for TokenProperty {
+            fn from(value: f64) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+        impl From<bool> for TokenProperty {
+            fn from(value: bool) -> TokenProperty {
+                TokenProperty {
+                    value: Some(value.to_string()),
+                    token: None,
+                }
+            }
+        }
+
+        impl From<String> for TokenProperty {
+            fn from(value: String) -> TokenProperty {
+                assert!(value.len() > 0, "String property value has length 0");
+                TokenProperty {
+                    value: Some(value),
+                    token: None,
+                }
+            }
+        }
+
+
+        impl From<&pg_query::protobuf::Integer> for TokenProperty {
+            fn from(node: &pg_query::protobuf::Integer) -> TokenProperty {
+                TokenProperty {
+                        value: Some(node.ival.to_string()),
+                        token: Some(Token::Iconst)
+                    }
+            }
+        }
+
+        impl From<&pg_query::protobuf::Boolean> for TokenProperty {
+            fn from(node: &pg_query::protobuf::Boolean) -> TokenProperty {
+                TokenProperty {
+                        value: Some(node.boolval.to_string()),
+                        token: match node.boolval {
+                            true => Some(Token::TrueP),
+                            false => Some(Token::FalseP),
+                        }
+                    }
+            }
+        }
+
+        impl From<Token> for TokenProperty {
+            fn from(token: Token) -> TokenProperty {
+                TokenProperty {
+                    value: None,
+                    token: Some(token),
+                }
+            }
+        }
+
+        fn get_token_text(start: usize, end: usize, text: &str) -> String {
+            text.chars()
+                .skip(start)
+                .take(end - start)
+                .collect::<String>()
+        }
+
+
+        pub fn get_child_tokens<'tokens>(node: &NodeEnum, tokens: &'tokens Vec<ScanToken>, text: &str, nearest_parent_location: i32, furthest_child_location: Option<i32>) -> Vec<&'tokens ScanToken> {
+            let mut child_tokens = Vec::new();
+
+            let mut get_token = |property: TokenProperty| {
+                let token = tokens
+                    .iter()
+                    .filter_map(|t| {
+                        if property.token.is_some() {
+                            // if a token is set, we can safely ignore all tokens that are not of the same type
+                            if t.token() != property.token.unwrap() {
+                                return None;
+                            }
+                        }
+                        // make a string comparison of the text of the token and the property value
+                        if property.value.is_some()
+                            && get_token_text(
+                                usize::try_from(t.start).unwrap(),
+                                usize::try_from(t.end).unwrap(),
+                                text,
+                            )
+                            .to_lowercase()
+                                != property.value.as_ref().unwrap().to_lowercase()
+                        {
+                            return None;
+                        }
+
+                        // if the furthest child location is set, and it is smaller than the start of the token,
+                        // we can safely ignore this token, because it is not a child of the node
+                        if furthest_child_location.is_some()
+                            && furthest_child_location.unwrap() < t.start as i32
+                        {
+                            return None;
+                        }
+
+                        // if the token is before the nearest parent location, we can safely ignore it
+                        // if not, we calculate the distance to the nearest parent location
+                        let distance = t.start - nearest_parent_location;
+                        if distance >= 0 {
+                            Some((distance, t))
+                        } else {
+                            None
+                        }
+                    })
+                    // and use the token with the smallest distance to the nearest parent location
+                    .min_by_key(|(d, _)| d.to_owned())
+                    .map(|(_, t)| t);
+
+                if token.is_none() {
+                    panic!(
+                        "No matching token found for property {:?} in {:#?}",
+                        property, tokens
+                    );
+                }
+
+                child_tokens.push(token.unwrap());
+            };
+
+            match node {
+                #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
+            };
+
+            child_tokens
+        }
+    }
+}
+
+fn node_identifiers(nodes: &[Node]) -> Vec<Ident> {
+    nodes
+        .iter()
+        .map(|node| format_ident!("{}", &node.name))
+        .collect()
+}
+
+fn node_handlers(nodes: &[Node]) -> Vec<TokenStream> {
+    nodes
+        .iter()
+        .map(|node| {
+            let string_property_handlers = string_property_handlers(&node);
+            let custom_handlers = custom_handlers(&node);
+            quote! {
+                #custom_handlers
+                #(#string_property_handlers)*
+            }
+        })
+        .collect()
+}
+
+fn custom_handlers(node: &Node) -> TokenStream {
+    match node.name.as_str() {
+        "SelectStmt" => quote! {
+            get_token(TokenProperty::from(Token::Select));
+        },
+        "Integer" => quote! {
+            get_token(TokenProperty::from(n));
+        },
+        "Boolean" => quote! {
+            get_token(TokenProperty::from(n));
+        },
+        "AConst" => quote! {
+            if n.isnull {
+                get_token(TokenProperty::from(Token::NullP));
+            }
+        },
+        _ => quote! {},
+    }
+}
+
+fn string_property_handlers(node: &Node) -> Vec<TokenStream> {
+    node.fields
+        .iter()
+        .filter_map(|field| {
+            if field.repeated {
+                return None;
+            }
+            let field_name = format_ident!("{}", field.name.as_str());
+            match field.field_type {
+                // just handle string values for now
+                FieldType::String => Some(quote! {
+                    // most string values are never None, but an empty string
+                    if n.#field_name.len() > 0 {
+                        get_token(TokenProperty::from(n.#field_name.to_owned()));
+                    }
+                }),
+                _ => None,
+            }
+        })
+        .collect()
+}
diff --git a/crates/codegen/src/lib.rs b/crates/codegen/src/lib.rs
index bc63d4f6..b91b9789 100644
--- a/crates/codegen/src/lib.rs
+++ b/crates/codegen/src/lib.rs
@@ -1,11 +1,18 @@
+mod get_child_tokens;
 mod get_location;
 mod get_nodes;
 mod syntax_kind;
 
+use get_child_tokens::get_child_tokens_mod;
 use get_location::get_location_mod;
 use get_nodes::get_nodes_mod;
 use syntax_kind::syntax_kind_mod;
 
+#[proc_macro]
+pub fn get_child_tokens(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    get_child_tokens_mod(item.into()).into()
+}
+
 #[proc_macro]
 pub fn get_nodes(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
     get_nodes_mod(item.into()).into()
diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index fce88a89..8d461c87 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -1,5 +1,6 @@
 use std::cmp::{max, min};
 
+use crate::get_child_tokens_codegen::get_child_tokens;
 use crate::get_location_codegen::get_location;
 use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
@@ -29,114 +30,13 @@ pub fn estimate_node_range(
         let nearest_parent_location = get_nearest_parent_location(&n, nodes);
         let furthest_child_location = get_furthest_child_location(&n, nodes);
 
-        let mut child_tokens = Vec::new();
-
-        #[derive(Debug)]
-        struct TokenProperty {
-            value: Option<String>,
-            token: Option<Token>,
-        }
-
-        impl TokenProperty {
-            fn from_int(value: &i32) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    token: None,
-                }
-            }
-
-            fn from_string(value: &String) -> TokenProperty {
-                assert!(value.len() > 0, "String property value has length 0");
-                TokenProperty {
-                    value: Some(value.to_owned()),
-                    token: None,
-                }
-            }
-
-            fn from_token(token: Token) -> TokenProperty {
-                TokenProperty {
-                    value: None,
-                    token: Some(token),
-                }
-            }
-        }
-
-        let mut get_token = |property: TokenProperty| {
-            let token = tokens
-                .iter()
-                .filter_map(|t| {
-                    if property.token.is_some() {
-                        // if a token is set, we can safely ignore all tokens that are not of the same type
-                        if t.token() != property.token.unwrap() {
-                            return None;
-                        }
-                    }
-                    // make a string comparison of the text of the token and the property value
-                    if property.value.is_some()
-                        && get_token_text(
-                            usize::try_from(t.start).unwrap(),
-                            usize::try_from(t.end).unwrap(),
-                            text,
-                        )
-                        .to_lowercase()
-                            != property.value.as_ref().unwrap().to_lowercase()
-                    {
-                        return None;
-                    }
-
-                    // if the furthest child location is set, and it is smaller than the start of the token,
-                    // we can safely ignore this token, because it is not a child of the node
-                    if furthest_child_location.is_some()
-                        && furthest_child_location.unwrap() < t.start as i32
-                    {
-                        return None;
-                    }
-
-                    // if the token is before the nearest parent location, we can safely ignore it
-                    // if not, we calculate the distance to the nearest parent location
-                    let distance = t.start - nearest_parent_location;
-                    if distance >= 0 {
-                        Some((distance, t))
-                    } else {
-                        None
-                    }
-                })
-                // and use the token with the smallest distance to the nearest parent location
-                .min_by_key(|(d, _)| d.to_owned())
-                .map(|(_, t)| t);
-
-            if token.is_none() {
-                panic!(
-                    "No matching token found for property {:?} in {:#?}",
-                    property, tokens
-                );
-            }
-
-            child_tokens.push(token.unwrap());
-        };
-
-        match &n.node {
-            NodeEnum::RangeVar(n) => {
-                get_token(TokenProperty::from_string(&n.relname));
-            }
-            NodeEnum::Integer(n) => {
-                get_token(TokenProperty::from_int(&n.ival));
-            }
-            NodeEnum::AConst(n) => {
-                if n.isnull {
-                    get_token(TokenProperty::from_token(Token::NullP));
-                }
-            }
-            NodeEnum::ResTarget(n) => {
-                if n.name.len() > 0 {
-                    get_token(TokenProperty::from_string(&n.name));
-                }
-            }
-            NodeEnum::SelectStmt(_) => {
-                get_token(TokenProperty::from_token(Token::Select));
-            }
-            _ => panic!("Node type not implemented: {:?}", n.node),
-        };
+        let child_tokens = get_child_tokens(
+            &n.node,
+            tokens,
+            text,
+            nearest_parent_location,
+            furthest_child_location,
+        );
 
         // For `from`, the location of the node itself is always correct.
         // If not available, the closest estimation is the smaller value of the start of the first direct child token,
@@ -229,13 +129,6 @@ pub fn estimate_node_range(
     ranged_nodes
 }
 
-fn get_token_text(start: usize, end: usize, text: &str) -> String {
-    text.chars()
-        .skip(start)
-        .take(end - start)
-        .collect::<String>()
-}
-
 fn get_furthest_child_location(c: &Node, children: &Vec<Node>) -> Option<i32> {
     children
         .iter()
diff --git a/crates/parser/src/get_child_tokens_codegen.rs b/crates/parser/src/get_child_tokens_codegen.rs
new file mode 100644
index 00000000..22430efb
--- /dev/null
+++ b/crates/parser/src/get_child_tokens_codegen.rs
@@ -0,0 +1,3 @@
+use codegen::get_child_tokens;
+
+get_child_tokens!();
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 3bc10622..b2975eaf 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -17,6 +17,7 @@
 
 mod ast_node;
 mod estimate_node_range;
+mod get_child_tokens_codegen;
 mod get_location_codegen;
 mod get_nodes_codegen;
 mod parser;
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 6d0187fa..9df7c50e 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,12 +1,10 @@
-use std::{collections::VecDeque, iter::Peekable};
+use std::collections::VecDeque;
 
 use cstree::text::{TextRange, TextSize};
 use logos::Logos;
 
 use crate::{
-    estimate_node_range::{estimate_node_range, RangedNode},
-    get_nodes_codegen::get_nodes,
-    parser::Parser,
+    estimate_node_range::estimate_node_range, get_nodes_codegen::get_nodes, parser::Parser,
     syntax_kind_codegen::SyntaxKind,
 };
 
@@ -224,7 +222,7 @@ mod tests {
 
     #[test]
     fn test_statement() {
-        let input = "select null;";
+        let input = "select 1;";
 
         let mut parser = Parser::new();
         parser.parse_statement_at(input, None);

From 3ee8664f5dd73d8a8f910626bcd4c280164347bb Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Wed, 4 Oct 2023 18:18:32 +0200
Subject: [PATCH 09/16] fix: minor fixes while making tests green

---
 crates/codegen/src/get_child_tokens.rs   | 58 ++++++++++++++++--------
 crates/parser/src/estimate_node_range.rs | 58 ++++++++++++++----------
 crates/parser/src/source_parser.rs       | 16 +++++--
 3 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/crates/codegen/src/get_child_tokens.rs b/crates/codegen/src/get_child_tokens.rs
index ce386882..7d33ef63 100644
--- a/crates/codegen/src/get_child_tokens.rs
+++ b/crates/codegen/src/get_child_tokens.rs
@@ -78,7 +78,7 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
             fn from(value: String) -> TokenProperty {
                 assert!(value.len() > 0, "String property value has length 0");
                 TokenProperty {
-                    value: Some(value),
+                    value: Some(value.to_lowercase()),
                     token: None,
                 }
             }
@@ -115,11 +115,24 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
             }
         }
 
-        fn get_token_text(start: usize, end: usize, text: &str) -> String {
+        fn get_token_text(token: &ScanToken ,text: &str) -> String {
+            let start = usize::try_from(token.start).unwrap();
+            let end = usize::try_from(token.end).unwrap();
             text.chars()
                 .skip(start)
                 .take(end - start)
                 .collect::<String>()
+                .to_lowercase()
+        }
+
+        /// returns a list of aliases for a string. primarily used for data types.
+        ///
+        /// list from https://www.postgresql.org/docs/current/datatype.html
+        fn aliases(text: &str) -> Vec<&str> {
+            match text {
+                "integer" | "int" | "int4" => vec!["integer", "int", "int4"],
+                _ => vec![text],
+            }
         }
 
 
@@ -136,17 +149,19 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                                 return None;
                             }
                         }
+
                         // make a string comparison of the text of the token and the property value
-                        if property.value.is_some()
-                            && get_token_text(
-                                usize::try_from(t.start).unwrap(),
-                                usize::try_from(t.end).unwrap(),
-                                text,
-                            )
-                            .to_lowercase()
-                                != property.value.as_ref().unwrap().to_lowercase()
-                        {
-                            return None;
+                        if property.value.is_some() {
+                            let mut token_text = get_token_text(t, text);
+                            // if token is Sconst, remove leading and trailing quotes
+                            if t.token() == Token::Sconst {
+                                let string_delimiter: &[char; 2] = &['\'', '$'];
+                                token_text = token_text.trim_start_matches(string_delimiter).trim_end_matches(string_delimiter).to_string();
+                            }
+
+                            if !aliases(property.value.as_ref().unwrap()).contains(&token_text.as_str()) {
+                                return None;
+                            }
                         }
 
                         // if the furthest child location is set, and it is smaller than the start of the token,
@@ -170,14 +185,16 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                     .min_by_key(|(d, _)| d.to_owned())
                     .map(|(_, t)| t);
 
-                if token.is_none() {
-                    panic!(
-                        "No matching token found for property {:?} in {:#?}",
-                        property, tokens
-                    );
-                }
+                // if token.is_none() {
+                //     panic!(
+                //         "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}",
+                //         property, node, text, tokens
+                //     );
+                // }
 
-                child_tokens.push(token.unwrap());
+                if token.is_some() {
+                    child_tokens.push(token.unwrap());
+                }
             };
 
             match node {
@@ -221,6 +238,9 @@ fn custom_handlers(node: &Node) -> TokenStream {
         "Boolean" => quote! {
             get_token(TokenProperty::from(n));
         },
+        "AStar" => quote! {
+            get_token(TokenProperty::from(Token::Ascii42));
+        },
         "AConst" => quote! {
             if n.isnull {
                 get_token(TokenProperty::from(Token::NullP));
diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index 8d461c87..e601fab5 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -4,6 +4,7 @@ use crate::get_child_tokens_codegen::get_child_tokens;
 use crate::get_location_codegen::get_location;
 use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
+use log::debug;
 use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
 
 #[derive(Debug, Clone)]
@@ -42,20 +43,23 @@ pub fn estimate_node_range(
         // If not available, the closest estimation is the smaller value of the start of the first direct child token,
         // and the start of all children ranges. If neither is available, let’s panic for now.
         // The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself.
+        let children_ranges = ranged_nodes
+            .iter()
+            .filter(|x| x.inner.path.starts_with(n.path.as_str()))
+            .collect::<Vec<&RangedNode>>();
         let location = get_location(&n.node);
         let from = if location.is_some() {
-            location.unwrap()
+            Some(location.unwrap())
         } else {
             let start_of_first_child_token = if child_tokens.len() > 0 {
                 Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start)
             } else {
                 None
             };
-            let start_of_all_children_ranges = if ranged_nodes.len() > 0 {
+            let start_of_all_children_ranges = if children_ranges.len() > 0 {
                 Some(
-                    ranged_nodes
+                    children_ranges
                         .iter()
-                        .filter(|x| x.inner.path.starts_with(n.path.as_str()))
                         .min_by_key(|n| n.range.start())
                         .unwrap()
                         .range
@@ -67,17 +71,18 @@ pub fn estimate_node_range(
 
             if start_of_first_child_token.is_some() {
                 if start_of_all_children_ranges.is_some() {
-                    min(
+                    Some(min(
                         start_of_first_child_token.unwrap(),
                         u32::from(start_of_all_children_ranges.unwrap()) as i32,
-                    )
+                    ))
                 } else {
-                    start_of_first_child_token.unwrap()
+                    Some(start_of_first_child_token.unwrap())
                 }
             } else if start_of_all_children_ranges.is_some() {
-                u32::from(start_of_all_children_ranges.unwrap()) as i32
+                Some(u32::from(start_of_all_children_ranges.unwrap()) as i32)
             } else {
-                panic!("No location or child tokens found for node {:?}", n);
+                debug!("No location or child tokens found for node {:?}", n);
+                None
             }
         };
 
@@ -87,11 +92,10 @@ pub fn estimate_node_range(
         } else {
             None
         };
-        let end_of_all_children_ranges = if ranged_nodes.len() > 0 {
+        let end_of_all_children_ranges = if children_ranges.len() > 0 {
             Some(
-                ranged_nodes
+                children_ranges
                     .iter()
-                    .filter(|x| x.inner.path.starts_with(n.path.as_str()))
                     .max_by_key(|n| n.range.end())
                     .unwrap()
                     .range
@@ -102,30 +106,34 @@ pub fn estimate_node_range(
         };
         let to = if end_of_last_child_token.is_some() {
             if end_of_all_children_ranges.is_some() {
-                max(
+                Some(max(
                     end_of_last_child_token.unwrap(),
                     u32::from(end_of_all_children_ranges.unwrap()) as i32,
-                )
+                ))
             } else {
-                end_of_last_child_token.unwrap()
+                Some(end_of_last_child_token.unwrap())
             }
         } else if end_of_all_children_ranges.is_some() {
-            u32::from(end_of_all_children_ranges.unwrap()) as i32
+            Some(u32::from(end_of_all_children_ranges.unwrap()) as i32)
         } else {
-            panic!("No child tokens or children ranges found for node {:?}", n);
+            debug!("No child tokens or children ranges found for node {:?}", n);
+            None
         };
 
-        // TODO: validate that prepending is enough to ensure that `ranged_nodes` is sorted by
-        // range.start
-        ranged_nodes.insert(
-            0,
-            RangedNode {
+        if from.is_some() && to.is_some() {
+            ranged_nodes.push(RangedNode {
                 inner: n.to_owned(),
-                range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)),
-            },
-        );
+                range: TextRange::new(
+                    TextSize::from(from.unwrap() as u32),
+                    TextSize::from(to.unwrap() as u32),
+                ),
+            });
+        }
     });
 
+    // sort by start of range, and then by depth
+    ranged_nodes.sort_by_key(|i| (i.range.start(), i.inner.depth));
+
     ranged_nodes
 }
 
diff --git a/crates/parser/src/source_parser.rs b/crates/parser/src/source_parser.rs
index 341d6eb9..98e4c91f 100644
--- a/crates/parser/src/source_parser.rs
+++ b/crates/parser/src/source_parser.rs
@@ -93,10 +93,10 @@ impl Parser {
                     self.token(SyntaxKind::Newline, token.text.as_str());
                 }
                 SourceFileToken::Statement => {
-                    // self.parse_statement(
-                    //     token.text.as_str(),
-                    //     Some(offset + u32::from(token.span.start())),
-                    // );
+                    self.parse_statement_at(
+                        token.text.as_str(),
+                        Some(offset + u32::from(token.span.start())),
+                    );
                 }
             };
         }
@@ -110,6 +110,10 @@ impl Parser {
 mod tests {
     use super::*;
 
+    fn init() {
+        let _ = env_logger::builder().is_test(true).try_init();
+    }
+
     #[test]
     fn test_source_file_lexer() {
         let input = "select * from contact where id = '123';\n\n-- test comment\n\nselect wrong statement;\n\nselect id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';\n\n";
@@ -145,6 +149,8 @@ mod tests {
 
     #[test]
     fn test_source_file_parser() {
+        init();
+
         let input = "select id, name from users where id = '1224';
 
 select select;
@@ -166,6 +172,8 @@ select 1;
 
     #[test]
     fn test_lexer_with_nested_statements() {
+        init();
+
         let input = "select * from test;
 
 select 123;

From 16c3dcc068177fc04ef36cfde1dac0eb5af4f589 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Wed, 4 Oct 2023 18:20:18 +0200
Subject: [PATCH 10/16] chore: cleanup

---
 crates/codegen/src/get_child_tokens.rs | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/crates/codegen/src/get_child_tokens.rs b/crates/codegen/src/get_child_tokens.rs
index 7d33ef63..cf124f8f 100644
--- a/crates/codegen/src/get_child_tokens.rs
+++ b/crates/codegen/src/get_child_tokens.rs
@@ -11,6 +11,7 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
     let node_handlers = node_handlers(&proto_file.nodes);
 
     quote! {
+        use log::{debug};
         use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
 
         #[derive(Debug)]
@@ -185,15 +186,13 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                     .min_by_key(|(d, _)| d.to_owned())
                     .map(|(_, t)| t);
 
-                // if token.is_none() {
-                //     panic!(
-                //         "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}",
-                //         property, node, text, tokens
-                //     );
-                // }
-
                 if token.is_some() {
                     child_tokens.push(token.unwrap());
+                } else {
+                    debug!(
+                        "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}",
+                        property, node, text, tokens
+                    );
                 }
             };
 

From 3e5f488e54521c3025e3c386417d0ed59ba4cc41 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Thu, 5 Oct 2023 21:09:05 +0200
Subject: [PATCH 11/16] feat: bring back sibling token logic

---
 crates/parser/src/lib.rs                      |  1 +
 crates/parser/src/parser.rs                   |  4 ++
 crates/parser/src/sibling_token.rs            | 42 +++++++++++
 crates/parser/src/statement_parser.rs         | 42 +++++++++--
 .../snapshots/statements/valid/0001.snap.new  | 70 +++++++++++++++++++
 5 files changed, 154 insertions(+), 5 deletions(-)
 create mode 100644 crates/parser/src/sibling_token.rs
 create mode 100644 crates/parser/tests/snapshots/statements/valid/0001.snap.new

diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index b2975eaf..1bb1c53a 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -21,6 +21,7 @@ mod get_child_tokens_codegen;
 mod get_location_codegen;
 mod get_nodes_codegen;
 mod parser;
+mod sibling_token;
 mod source_parser;
 mod statement_parser;
 mod syntax_error;
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs
index 2f311ba5..06cd9d85 100644
--- a/crates/parser/src/parser.rs
+++ b/crates/parser/src/parser.rs
@@ -1,5 +1,6 @@
 use cstree::syntax::ResolvedNode;
 use cstree::{build::GreenNodeBuilder, text::TextRange};
+use log::debug;
 use pg_query::NodeEnum;
 
 use crate::ast_node::RawStmt;
@@ -40,16 +41,19 @@ impl Parser {
 
     /// start a new node of `SyntaxKind`
     pub fn start_node(&mut self, kind: SyntaxKind) {
+        debug!("start_node: {:?}", kind);
         self.inner.start_node(kind);
     }
 
     /// finish current node
     pub fn finish_node(&mut self) {
+        debug!("finish_node");
         self.inner.finish_node();
     }
 
     /// applies token
     pub fn token(&mut self, kind: SyntaxKind, text: &str) {
+        debug!("token: {:?} {:?}", kind, text);
         self.inner.token(kind, text);
     }
 
diff --git a/crates/parser/src/sibling_token.rs b/crates/parser/src/sibling_token.rs
new file mode 100644
index 00000000..39ad0993
--- /dev/null
+++ b/crates/parser/src/sibling_token.rs
@@ -0,0 +1,42 @@
+use crate::syntax_kind_codegen::SyntaxKind;
+
+const SIBLINGS: [(SyntaxKind, SyntaxKind); 1] = [(SyntaxKind::Ascii40, SyntaxKind::Ascii41)];
+
+impl SyntaxKind {
+    pub fn is_closing_sibling(self) -> bool {
+        SIBLINGS.iter().any(|(_, close)| *close == self)
+    }
+
+    pub fn is_opening_sibling(self) -> bool {
+        SIBLINGS.iter().any(|(open, _)| *open == self)
+    }
+
+    pub fn get_closing_sibling(self) -> SyntaxKind {
+        SIBLINGS
+            .iter()
+            .find_map(|(open, close)| if *open == self { Some(*close) } else { None })
+            .unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_eq;
+
+    use super::*;
+
+    #[test]
+    fn test_siblings() {
+        assert_eq!(SyntaxKind::Ascii40.is_opening_sibling(), true);
+        assert_eq!(
+            SyntaxKind::Ascii40.get_closing_sibling(),
+            SyntaxKind::Ascii41
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_mismatched_siblings() {
+        SyntaxKind::Ascii41.get_closing_sibling();
+    }
+}
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 9df7c50e..dae142e8 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -99,6 +99,8 @@ impl Parser {
         let mut token_buffer: VecDeque<(SyntaxKind, String)> = VecDeque::new();
         // Keeps track of currently open nodes. Latest opened is last.
         let mut open_nodes: Vec<(SyntaxKind, TextRange, i32)> = Vec::new();
+        // List of (SyntaxKind, depth) to keep track of currently open sibling tokens and their depths. Latest opened is last.
+        let mut open_tokens: Vec<(SyntaxKind, i32)> = Vec::new();
 
         // 3. Parse the statement
 
@@ -140,6 +142,7 @@ impl Parser {
                 && pointer <= next_pg_query_token.unwrap().end
             {
                 let token = pg_query_tokens.next().unwrap();
+                let token_syntax_kind = SyntaxKind::new_from_pg_query_token(token);
 
                 let token_text = text
                     .chars()
@@ -149,9 +152,18 @@ impl Parser {
 
                 // a node can only start and end with a pg_query token, so we can handle them here
 
+                // if closing token, close nodes until depth of opening token before applying it
+                let target_depth = if token_syntax_kind.is_closing_sibling() {
+                    Some(open_tokens.last().unwrap().1)
+                } else {
+                    None
+                };
+
                 // before applying the token, close any node that ends before the token starts
                 while open_nodes.last().is_some()
                     && open_nodes.last().unwrap().1.end() <= TextSize::from(token.start as u32)
+                    && (target_depth.is_none()
+                        || open_nodes.last().unwrap().2 > target_depth.unwrap())
                 {
                     self.finish_node();
                     open_nodes.pop();
@@ -176,11 +188,12 @@ impl Parser {
                     ));
                 }
 
-                // apply the token
-                self.token(
-                    SyntaxKind::new_from_pg_query_token(token),
-                    token_text.as_str(),
-                );
+                // apply the token to the cst
+                self.token(token_syntax_kind, token_text.as_str());
+                // save the token as an opening sibling token, if it is one
+                if token_syntax_kind.is_opening_sibling() {
+                    open_tokens.push((token_syntax_kind, open_nodes.last().unwrap().2));
+                }
 
                 token_text.len() as i32
             } else {
@@ -220,8 +233,14 @@ mod tests {
 
     use super::*;
 
+    fn init() {
+        let _ = env_logger::builder().is_test(true).try_init();
+    }
+
     #[test]
     fn test_statement() {
+        init();
+
         let input = "select 1;";
 
         let mut parser = Parser::new();
@@ -231,6 +250,19 @@ mod tests {
         assert_eq!(parsed.cst.text(), input);
     }
 
+    #[test]
+    fn test_sibling_tokens() {
+        init();
+
+        let input = "SELECT city, count(*) FILTER (WHERE temp_lo < 45), max(temp_lo) FROM weather GROUP BY city;";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
     // #[test]
     // fn test_create_sql_function() {
     //     let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
diff --git a/crates/parser/tests/snapshots/statements/valid/0001.snap.new b/crates/parser/tests/snapshots/statements/valid/0001.snap.new
new file mode 100644
index 00000000..469b0b71
--- /dev/null
+++ b/crates/parser/tests/snapshots/statements/valid/0001.snap.new
@@ -0,0 +1,70 @@
+---
+source: crates/parser/tests/statement_parser_test.rs
+assertion_line: 36
+description: "SELECT city, count(*) FILTER (WHERE temp_lo < 45), max(temp_lo)\n    FROM weather\n    GROUP BY city;\n"
+---
+SelectStmt@0..102
+  Select@0..6 "SELECT"
+  Whitespace@6..7 " "
+  ResTarget@7..11
+    ColumnRef@7..11
+      String@7..11
+        Ident@7..11 "city"
+  Ascii44@11..12 ","
+  Whitespace@12..13 " "
+  ResTarget@13..49
+    FuncCall@13..49
+      String@13..18
+        Ident@13..18 "count"
+      Ascii40@18..19 "("
+      Ascii42@19..20 "*"
+      Ascii41@20..21 ")"
+      Whitespace@21..22 " "
+      Filter@22..28 "FILTER"
+      Whitespace@28..29 " "
+      Ascii40@29..30 "("
+      Where@30..35 "WHERE"
+      Whitespace@35..36 " "
+      AExpr@36..48
+        ColumnRef@36..43
+          String@36..43
+            Ident@36..43 "temp_lo"
+        Whitespace@43..44 " "
+        String@44..45
+          Ascii60@44..45 "<"
+        Whitespace@45..46 " "
+        AConst@46..48
+          Integer@46..48
+            Iconst@46..48 "45"
+      Ascii41@48..49 ")"
+  Ascii41@49..50 ")"
+  Ascii44@50..51 ","
+  Whitespace@51..52 " "
+  ResTarget@52..65
+    FuncCall@52..65
+      String@52..55
+        Ident@52..55 "max"
+      ColumnRef@55..64
+        String@55..64
+          Ascii40@55..56 "("
+          Ident@56..63 "temp_lo"
+          Ascii41@63..64 ")"
+      Ascii41@64..65 ")"
+  Ascii41@65..66 ")"
+  Newline@66..67 "\n"
+  Whitespace@67..71 "    "
+  From@71..75 "FROM"
+  Whitespace@75..76 " "
+  RangeVar@76..83
+    Ident@76..83 "weather"
+  Newline@83..84 "\n"
+  Whitespace@84..88 "    "
+  GroupP@88..93 "GROUP"
+  Whitespace@93..94 " "
+  By@94..96 "BY"
+  Whitespace@96..97 " "
+  ColumnRef@97..101
+    String@97..101
+      Ident@97..101 "city"
+  Ascii59@101..102 ";"
+

From ba7fee4cf7037372d40f814aa2d4b62b41b8b770 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Sat, 7 Oct 2023 09:59:16 +0200
Subject: [PATCH 12/16] fix: minor fixes while making tests green

---
 ...ild_tokens.rs => get_child_token_range.rs} |  70 +++++--
 crates/codegen/src/get_location.rs            |  25 ++-
 crates/codegen/src/get_nodes.rs               |   5 +-
 crates/codegen/src/lib.rs                     |   8 +-
 crates/parser/src/estimate_node_range.rs      |  78 +++-----
 .../src/get_child_token_range_codegen.rs      |   3 +
 crates/parser/src/get_child_tokens_codegen.rs |   3 -
 crates/parser/src/lib.rs                      |   2 +-
 crates/parser/src/sibling_token.rs            |   7 +
 crates/parser/src/statement_parser.rs         | 105 +++++++++-
 .../snapshots/statements/valid/0001.snap      |  69 ++++---
 .../snapshots/statements/valid/0001.snap.new  |  70 -------
 .../snapshots/statements/valid/0002.snap      |   6 +-
 .../snapshots/statements/valid/0003.snap      |  76 ++++---
 .../snapshots/statements/valid/0004.snap      | 103 ++++++----
 .../snapshots/statements/valid/0005.snap      |  22 ++-
 .../snapshots/statements/valid/0006.snap      |   7 +-
 .../snapshots/statements/valid/0007.snap      |  33 ++--
 .../snapshots/statements/valid/0008.snap      |  48 +++--
 .../snapshots/statements/valid/0009.snap      | 185 +++++++++++-------
 .../snapshots/statements/valid/0010.snap      |  44 +++--
 .../snapshots/statements/valid/0011.snap      |  24 ++-
 .../snapshots/statements/valid/0012.snap.new  |  61 ++++++
 .../snapshots/statements/valid/0013.snap.new  |  65 ++++++
 .../snapshots/statements/valid/0014.snap      | 114 ++++++-----
 .../snapshots/statements/valid/0015.snap.new  |  80 ++++++++
 crates/parser/tests/statement_parser_test.rs  |   3 +
 27 files changed, 882 insertions(+), 434 deletions(-)
 rename crates/codegen/src/{get_child_tokens.rs => get_child_token_range.rs} (79%)
 create mode 100644 crates/parser/src/get_child_token_range_codegen.rs
 delete mode 100644 crates/parser/src/get_child_tokens_codegen.rs
 delete mode 100644 crates/parser/tests/snapshots/statements/valid/0001.snap.new
 create mode 100644 crates/parser/tests/snapshots/statements/valid/0012.snap.new
 create mode 100644 crates/parser/tests/snapshots/statements/valid/0013.snap.new
 create mode 100644 crates/parser/tests/snapshots/statements/valid/0015.snap.new

diff --git a/crates/codegen/src/get_child_tokens.rs b/crates/codegen/src/get_child_token_range.rs
similarity index 79%
rename from crates/codegen/src/get_child_tokens.rs
rename to crates/codegen/src/get_child_token_range.rs
index cf124f8f..40c9c4fe 100644
--- a/crates/codegen/src/get_child_tokens.rs
+++ b/crates/codegen/src/get_child_token_range.rs
@@ -2,7 +2,7 @@ use pg_query_proto_parser::{FieldType, Node, ProtoParser};
 use proc_macro2::{Ident, TokenStream};
 use quote::{format_ident, quote};
 
-pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
+pub fn get_child_token_range_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
     let parser = ProtoParser::new("./libpg_query/protobuf/pg_query.proto");
 
     let proto_file = parser.parse();
@@ -12,7 +12,8 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
 
     quote! {
         use log::{debug};
-        use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
+        use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum, protobuf::SortByDir};
+        use cstree::text::{TextRange, TextSize};
 
         #[derive(Debug)]
         struct TokenProperty {
@@ -126,18 +127,30 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                 .to_lowercase()
         }
 
+
+        /// list of aliases from https://www.postgresql.org/docs/current/datatype.html
+        const ALIASES: [&[&str]; 2]= [
+            &["integer", "int", "int4"],
+            &["real", "float4"],
+        ];
+
         /// returns a list of aliases for a string. primarily used for data types.
-        ///
-        /// list from https://www.postgresql.org/docs/current/datatype.html
         fn aliases(text: &str) -> Vec<&str> {
-            match text {
-                "integer" | "int" | "int4" => vec!["integer", "int", "int4"],
-                _ => vec![text],
+            for alias in ALIASES {
+                if alias.contains(&text) {
+                    return alias.to_vec();
+                }
             }
+            return vec![text];
         }
 
+        pub struct ChildTokenRange {
+            /// the .start of all child tokens used to estimate the range
+            pub child_token_indices: Vec<i32>,
+            pub range: Option<TextRange>
+        }
 
-        pub fn get_child_tokens<'tokens>(node: &NodeEnum, tokens: &'tokens Vec<ScanToken>, text: &str, nearest_parent_location: i32, furthest_child_location: Option<i32>) -> Vec<&'tokens ScanToken> {
+        pub fn get_child_token_range(node: &NodeEnum, tokens: Vec<&ScanToken>, text: &str, nearest_parent_location: u32) -> ChildTokenRange {
             let mut child_tokens = Vec::new();
 
             let mut get_token = |property: TokenProperty| {
@@ -165,17 +178,9 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                             }
                         }
 
-                        // if the furthest child location is set, and it is smaller than the start of the token,
-                        // we can safely ignore this token, because it is not a child of the node
-                        if furthest_child_location.is_some()
-                            && furthest_child_location.unwrap() < t.start as i32
-                        {
-                            return None;
-                        }
-
                         // if the token is before the nearest parent location, we can safely ignore it
                         // if not, we calculate the distance to the nearest parent location
-                        let distance = t.start - nearest_parent_location;
+                        let distance = t.start - nearest_parent_location as i32;
                         if distance >= 0 {
                             Some((distance, t))
                         } else {
@@ -200,7 +205,17 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
                 #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
             };
 
-            child_tokens
+            ChildTokenRange {
+                child_token_indices: child_tokens.iter().map(|t| t.start).collect(),
+                range: if child_tokens.len() > 0 {
+                    Some(TextRange::new(
+                        TextSize::from(child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32),
+                        TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
+                    ))
+                } else {
+                    None
+                }
+            }
         }
     }
 }
@@ -230,6 +245,9 @@ fn custom_handlers(node: &Node) -> TokenStream {
     match node.name.as_str() {
         "SelectStmt" => quote! {
             get_token(TokenProperty::from(Token::Select));
+            if n.distinct_clause.len() > 0 {
+                get_token(TokenProperty::from(Token::Distinct));
+            }
         },
         "Integer" => quote! {
             get_token(TokenProperty::from(n));
@@ -240,6 +258,22 @@ fn custom_handlers(node: &Node) -> TokenStream {
         "AStar" => quote! {
             get_token(TokenProperty::from(Token::Ascii42));
         },
+        "FuncCall" => quote! {
+            if n.agg_filter.is_some() {
+                get_token(TokenProperty::from(Token::Filter));
+            }
+        },
+        "SortBy" => quote! {
+            get_token(TokenProperty::from(Token::Order));
+            match n.sortby_dir {
+                2 => get_token(TokenProperty::from(Token::Asc)),
+                3 => get_token(TokenProperty::from(Token::Desc)),
+                _ => {}
+            }
+        },
+        "WindowDef" => quote! {
+            get_token(TokenProperty::from(Token::Partition));
+        },
         "AConst" => quote! {
             if n.isnull {
                 get_token(TokenProperty::from(Token::NullP));
diff --git a/crates/codegen/src/get_location.rs b/crates/codegen/src/get_location.rs
index 29c79088..7e8cf5b0 100644
--- a/crates/codegen/src/get_location.rs
+++ b/crates/codegen/src/get_location.rs
@@ -14,27 +14,36 @@ pub fn get_location_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenSt
     quote! {
         use pg_query::NodeEnum;
 
-        // Returns the location of a node
-        pub fn get_location(node: &NodeEnum) -> Option<i32> {
+        /// Returns the location of a node
+        pub fn get_location(node: &NodeEnum) -> Option<u32> {
+            let loc = get_location_internal(node);
+            if loc.is_some() {
+                u32::try_from(loc.unwrap()).ok()
+            } else {
+                None
+            }
+        }
+
+        fn get_location_internal(node: &NodeEnum) -> Option<i32> {
             let location = match node {
-                // for some nodes, the location of the node itself is after their childrens location.
+                // for some nodes, the location of the node itself is after their children location.
                 // we implement the logic for those nodes manually.
                 // if you add one, make sure to add its name to `manual_node_names()`.
                 NodeEnum::BoolExpr(n) => {
                     let a = n.args.iter().min_by(|a, b| {
-                        let loc_a = get_location(&a.node.as_ref().unwrap());
-                        let loc_b = get_location(&b.node.as_ref().unwrap());
+                        let loc_a = get_location_internal(&a.node.as_ref().unwrap());
+                        let loc_b = get_location_internal(&b.node.as_ref().unwrap());
                         loc_a.cmp(&loc_b)
                     });
-                    get_location(&a.unwrap().node.as_ref().unwrap())
+                    get_location_internal(&a.unwrap().node.as_ref().unwrap())
                 },
-                NodeEnum::AExpr(n) => get_location(&n.lexpr.as_ref().unwrap().node.as_ref().unwrap()),
+                NodeEnum::AExpr(n) => get_location_internal(&n.lexpr.as_ref().unwrap().node.as_ref().unwrap()),
                 #(NodeEnum::#node_identifiers(n) => #location_idents),*
             };
             if location.is_some() && location.unwrap() < 0 {
                 None
             } else {
-                location
+               location
             }
         }
     }
diff --git a/crates/codegen/src/get_nodes.rs b/crates/codegen/src/get_nodes.rs
index 26fd5a53..758bf8f7 100644
--- a/crates/codegen/src/get_nodes.rs
+++ b/crates/codegen/src/get_nodes.rs
@@ -101,8 +101,9 @@ fn property_handlers(node: &Node) -> Vec<TokenStream> {
                 Some(quote! {
                     n.#field_name
                         .iter()
-                        .for_each(|x| handle_child(x.node.as_ref().unwrap().to_owned()));
-
+                        .for_each(|x| if x.node.is_some() {
+                            handle_child(x.node.as_ref().unwrap().to_owned());
+                        });
                 })
             } else if field.field_type == FieldType::Node && field.is_one_of == false {
                 if field.node_name == Some("Node".to_owned()) {
diff --git a/crates/codegen/src/lib.rs b/crates/codegen/src/lib.rs
index b91b9789..b935182d 100644
--- a/crates/codegen/src/lib.rs
+++ b/crates/codegen/src/lib.rs
@@ -1,16 +1,16 @@
-mod get_child_tokens;
+mod get_child_token_range;
 mod get_location;
 mod get_nodes;
 mod syntax_kind;
 
-use get_child_tokens::get_child_tokens_mod;
+use get_child_token_range::get_child_token_range_mod;
 use get_location::get_location_mod;
 use get_nodes::get_nodes_mod;
 use syntax_kind::syntax_kind_mod;
 
 #[proc_macro]
-pub fn get_child_tokens(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    get_child_tokens_mod(item.into()).into()
+pub fn get_child_token_range(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    get_child_token_range_mod(item.into()).into()
 }
 
 #[proc_macro]
diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index e601fab5..166b2723 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -1,6 +1,6 @@
 use std::cmp::{max, min};
 
-use crate::get_child_tokens_codegen::get_child_tokens;
+use crate::get_child_token_range_codegen::get_child_token_range;
 use crate::get_location_codegen::get_location;
 use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
@@ -21,6 +21,8 @@ pub fn estimate_node_range(
 ) -> Vec<RangedNode> {
     let mut ranged_nodes: Vec<RangedNode> = Vec::new();
 
+    let mut used_tokens: Vec<i32> = Vec::new();
+
     // ensure that all children of any given node are already processed before processing the node itself
     nodes.sort_by(|a, b| b.path.cmp(&a.path));
 
@@ -29,36 +31,34 @@ pub fn estimate_node_range(
     nodes.iter().for_each(|n| {
         // first, get the estimated boundaries of the node based on the `location` property of a node
         let nearest_parent_location = get_nearest_parent_location(&n, nodes);
-        let furthest_child_location = get_furthest_child_location(&n, nodes);
 
-        let child_tokens = get_child_tokens(
+        let child_token_range = get_child_token_range(
             &n.node,
-            tokens,
+            tokens
+                .iter()
+                .filter(|t| !used_tokens.contains(&t.start))
+                .collect(),
             text,
             nearest_parent_location,
-            furthest_child_location,
         );
 
+        used_tokens.extend(child_token_range.child_token_indices);
+
         // For `from`, the location of the node itself is always correct.
         // If not available, the closest estimation is the smaller value of the start of the first direct child token,
         // and the start of all children ranges. If neither is available, let’s panic for now.
         // The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself.
-        let children_ranges = ranged_nodes
+        let child_node_ranges = ranged_nodes
             .iter()
             .filter(|x| x.inner.path.starts_with(n.path.as_str()))
             .collect::<Vec<&RangedNode>>();
         let location = get_location(&n.node);
         let from = if location.is_some() {
-            Some(location.unwrap())
+            Some(TextSize::from(location.unwrap()))
         } else {
-            let start_of_first_child_token = if child_tokens.len() > 0 {
-                Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start)
-            } else {
-                None
-            };
-            let start_of_all_children_ranges = if children_ranges.len() > 0 {
+            let start_of_all_children_ranges = if child_node_ranges.len() > 0 {
                 Some(
-                    children_ranges
+                    child_node_ranges
                         .iter()
                         .min_by_key(|n| n.range.start())
                         .unwrap()
@@ -69,17 +69,18 @@ pub fn estimate_node_range(
                 None
             };
 
-            if start_of_first_child_token.is_some() {
+            if child_token_range.range.is_some() {
+                let start_of_first_child_token = child_token_range.range.unwrap().start();
                 if start_of_all_children_ranges.is_some() {
                     Some(min(
-                        start_of_first_child_token.unwrap(),
-                        u32::from(start_of_all_children_ranges.unwrap()) as i32,
+                        start_of_first_child_token,
+                        start_of_all_children_ranges.unwrap(),
                     ))
                 } else {
-                    Some(start_of_first_child_token.unwrap())
+                    Some(start_of_first_child_token)
                 }
             } else if start_of_all_children_ranges.is_some() {
-                Some(u32::from(start_of_all_children_ranges.unwrap()) as i32)
+                Some(start_of_all_children_ranges.unwrap())
             } else {
                 debug!("No location or child tokens found for node {:?}", n);
                 None
@@ -87,14 +88,9 @@ pub fn estimate_node_range(
         };
 
         // For `to`, it’s the larger value of the end of the last direkt child token, and the end of all children ranges.
-        let end_of_last_child_token = if child_tokens.len() > 0 {
-            Some(child_tokens.iter().max_by_key(|t| t.end).unwrap().end)
-        } else {
-            None
-        };
-        let end_of_all_children_ranges = if children_ranges.len() > 0 {
+        let end_of_all_children_ranges = if child_node_ranges.len() > 0 {
             Some(
-                children_ranges
+                child_node_ranges
                     .iter()
                     .max_by_key(|n| n.range.end())
                     .unwrap()
@@ -104,17 +100,18 @@ pub fn estimate_node_range(
         } else {
             None
         };
-        let to = if end_of_last_child_token.is_some() {
+        let to = if child_token_range.range.is_some() {
+            let end_of_last_child_token = child_token_range.range.unwrap().end();
             if end_of_all_children_ranges.is_some() {
                 Some(max(
-                    end_of_last_child_token.unwrap(),
-                    u32::from(end_of_all_children_ranges.unwrap()) as i32,
+                    end_of_last_child_token,
+                    end_of_all_children_ranges.unwrap(),
                 ))
             } else {
-                Some(end_of_last_child_token.unwrap())
+                Some(end_of_last_child_token)
             }
         } else if end_of_all_children_ranges.is_some() {
-            Some(u32::from(end_of_all_children_ranges.unwrap()) as i32)
+            Some(end_of_all_children_ranges.unwrap())
         } else {
             debug!("No child tokens or children ranges found for node {:?}", n);
             None
@@ -123,10 +120,7 @@ pub fn estimate_node_range(
         if from.is_some() && to.is_some() {
             ranged_nodes.push(RangedNode {
                 inner: n.to_owned(),
-                range: TextRange::new(
-                    TextSize::from(from.unwrap() as u32),
-                    TextSize::from(to.unwrap() as u32),
-                ),
+                range: TextRange::new(from.unwrap(), to.unwrap()),
             });
         }
     });
@@ -137,19 +131,7 @@ pub fn estimate_node_range(
     ranged_nodes
 }
 
-fn get_furthest_child_location(c: &Node, children: &Vec<Node>) -> Option<i32> {
-    children
-        .iter()
-        .filter_map(|n| {
-            if !n.path.starts_with(c.path.as_str()) {
-                return None;
-            }
-            get_location(&n.node)
-        })
-        .max()
-}
-
-fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> i32 {
+fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> u32 {
     // if location is set, return it
     let location = get_location(&n.node);
     if location.is_some() {
diff --git a/crates/parser/src/get_child_token_range_codegen.rs b/crates/parser/src/get_child_token_range_codegen.rs
new file mode 100644
index 00000000..9b90d602
--- /dev/null
+++ b/crates/parser/src/get_child_token_range_codegen.rs
@@ -0,0 +1,3 @@
+use codegen::get_child_token_range;
+
+get_child_token_range!();
diff --git a/crates/parser/src/get_child_tokens_codegen.rs b/crates/parser/src/get_child_tokens_codegen.rs
deleted file mode 100644
index 22430efb..00000000
--- a/crates/parser/src/get_child_tokens_codegen.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-use codegen::get_child_tokens;
-
-get_child_tokens!();
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 1bb1c53a..493b49cb 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -17,7 +17,7 @@
 
 mod ast_node;
 mod estimate_node_range;
-mod get_child_tokens_codegen;
+mod get_child_token_range_codegen;
 mod get_location_codegen;
 mod get_nodes_codegen;
 mod parser;
diff --git a/crates/parser/src/sibling_token.rs b/crates/parser/src/sibling_token.rs
index 39ad0993..67e2ed71 100644
--- a/crates/parser/src/sibling_token.rs
+++ b/crates/parser/src/sibling_token.rs
@@ -17,6 +17,13 @@ impl SyntaxKind {
             .find_map(|(open, close)| if *open == self { Some(*close) } else { None })
             .unwrap()
     }
+
+    pub fn get_opening_sibling(self) -> SyntaxKind {
+        SIBLINGS
+            .iter()
+            .find_map(|(open, close)| if *close == self { Some(*open) } else { None })
+            .unwrap()
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index dae142e8..89c3c99c 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -1,6 +1,7 @@
 use std::collections::VecDeque;
 
 use cstree::text::{TextRange, TextSize};
+use log::debug;
 use logos::Logos;
 
 use crate::{
@@ -77,6 +78,8 @@ impl Parser {
             }
         };
 
+        debug!("pg_query_root: {:#?}", pg_query_root);
+
         // ranged nodes from pg_query.rs, including the root node
         // the nodes are ordered by starting range, starting with the root node
         let mut pg_query_nodes = match &pg_query_root {
@@ -154,7 +157,15 @@ impl Parser {
 
                 // if closing token, close nodes until depth of opening token before applying it
                 let target_depth = if token_syntax_kind.is_closing_sibling() {
-                    Some(open_tokens.last().unwrap().1)
+                    let opening_token = open_tokens.pop().unwrap();
+                    assert_eq!(
+                        opening_token.0.get_closing_sibling(),
+                        token_syntax_kind,
+                        "Opening token {:?} does not match closing token {:?}",
+                        opening_token.0,
+                        token_syntax_kind
+                    );
+                    Some(opening_token.1)
                 } else {
                     None
                 };
@@ -174,10 +185,10 @@ impl Parser {
                     self.token(kind, text.as_str());
                 }
 
-                // consume all nodes that start at or before the token ends
+                // consume all nodes that start before the token ends
                 while pg_query_nodes.peek().is_some()
                     && pg_query_nodes.peek().unwrap().range.start()
-                        <= TextSize::from(token.end as u32)
+                        < TextSize::from(token.end as u32)
                 {
                     let node = pg_query_nodes.next().unwrap();
                     self.start_node(SyntaxKind::new_from_pg_query_node(&node.inner.node));
@@ -263,6 +274,94 @@ mod tests {
         assert_eq!(parsed.cst.text(), input);
     }
 
+    #[test]
+    fn test_opening_token() {
+        init();
+
+        let input = "INSERT INTO weather VALUES ('San Francisco', 46, 50, 0.25, '1994-11-27');";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_closing_token_at_last_position() {
+        init();
+
+        let input = "CREATE TABLE weather (
+        city      varchar(80) references cities(name),
+        temp_lo   int
+);";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_select_with_alias() {
+        init();
+
+        let input = "SELECT w1.temp_lo AS low, w1.temp_hi AS high FROM weather";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_select_distinct() {
+        init();
+
+        let input = "SELECT DISTINCT city
+    FROM weather
+    ORDER BY city;";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_order_by() {
+        init();
+
+        let input = "SELECT sum(salary) OVER w, avg(salary) OVER w
+  FROM empsalary
+  WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_fn_call() {
+        init();
+
+        let input =
+            "SELECT count(*) FILTER (WHERE i < 5) AS filtered FROM generate_series(1,10) AS s(i);";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        dbg!(&parsed.cst);
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
     // #[test]
     // fn test_create_sql_function() {
     //     let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
diff --git a/crates/parser/tests/snapshots/statements/valid/0001.snap b/crates/parser/tests/snapshots/statements/valid/0001.snap
index 1c2083ab..b1bcd7cd 100644
--- a/crates/parser/tests/snapshots/statements/valid/0001.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0001.snap
@@ -2,46 +2,65 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT city, count(*) FILTER (WHERE temp_lo < 45), max(temp_lo)\n    FROM weather\n    GROUP BY city;\n"
 ---
-SelectStmt@0..100
+SelectStmt@0..99
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Ident@7..11 "city"
+  ResTarget@7..11
+    ColumnRef@7..11
+      String@7..11
+        Ident@7..11 "city"
   Ascii44@11..12 ","
   Whitespace@12..13 " "
-  Ident@13..18 "count"
-  Ascii40@18..19 "("
-  Ascii42@19..20 "*"
-  Ascii41@20..21 ")"
-  Whitespace@21..22 " "
-  Filter@22..28 "FILTER"
-  Whitespace@28..29 " "
-  Ascii40@29..30 "("
-  Where@30..35 "WHERE"
-  Whitespace@35..36 " "
-  Ident@36..43 "temp_lo"
-  Whitespace@43..44 " "
-  Ascii60@44..45 "<"
-  Whitespace@45..46 " "
-  Iconst@46..48 "45"
-  Ascii41@48..49 ")"
+  ResTarget@13..49
+    FuncCall@13..49
+      String@13..18
+        Ident@13..18 "count"
+      Ascii40@18..19 "("
+      Ascii42@19..20 "*"
+      Ascii41@20..21 ")"
+      Whitespace@21..22 " "
+      Filter@22..28 "FILTER"
+      Whitespace@28..29 " "
+      Ascii40@29..30 "("
+      Where@30..35 "WHERE"
+      Whitespace@35..36 " "
+      AExpr@36..48
+        ColumnRef@36..43
+          String@36..43
+            Ident@36..43 "temp_lo"
+        Whitespace@43..44 " "
+        String@44..45
+          Ascii60@44..45 "<"
+        Whitespace@45..46 " "
+        AConst@46..48
+          Integer@46..48
+            Iconst@46..48 "45"
+      Ascii41@48..49 ")"
   Ascii44@49..50 ","
   Whitespace@50..51 " "
-  Ident@51..54 "max"
-  Ascii40@54..55 "("
-  Ident@55..62 "temp_lo"
-  Ascii41@62..63 ")"
+  ResTarget@51..63
+    FuncCall@51..63
+      String@51..54
+        Ident@51..54 "max"
+      Ascii40@54..55 "("
+      ColumnRef@55..62
+        String@55..62
+          Ident@55..62 "temp_lo"
+      Ascii41@62..63 ")"
   Newline@63..64 "\n"
   Whitespace@64..68 "    "
   From@68..72 "FROM"
   Whitespace@72..73 " "
-  Ident@73..80 "weather"
+  RangeVar@73..80
+    Ident@73..80 "weather"
   Newline@80..81 "\n"
   Whitespace@81..85 "    "
   GroupP@85..90 "GROUP"
   Whitespace@90..91 " "
   By@91..93 "BY"
   Whitespace@93..94 " "
-  Ident@94..98 "city"
+  ColumnRef@94..98
+    String@94..98
+      Ident@94..98 "city"
   Ascii59@98..99 ";"
-  Newline@99..100 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0001.snap.new b/crates/parser/tests/snapshots/statements/valid/0001.snap.new
deleted file mode 100644
index 469b0b71..00000000
--- a/crates/parser/tests/snapshots/statements/valid/0001.snap.new
+++ /dev/null
@@ -1,70 +0,0 @@
----
-source: crates/parser/tests/statement_parser_test.rs
-assertion_line: 36
-description: "SELECT city, count(*) FILTER (WHERE temp_lo < 45), max(temp_lo)\n    FROM weather\n    GROUP BY city;\n"
----
-SelectStmt@0..102
-  Select@0..6 "SELECT"
-  Whitespace@6..7 " "
-  ResTarget@7..11
-    ColumnRef@7..11
-      String@7..11
-        Ident@7..11 "city"
-  Ascii44@11..12 ","
-  Whitespace@12..13 " "
-  ResTarget@13..49
-    FuncCall@13..49
-      String@13..18
-        Ident@13..18 "count"
-      Ascii40@18..19 "("
-      Ascii42@19..20 "*"
-      Ascii41@20..21 ")"
-      Whitespace@21..22 " "
-      Filter@22..28 "FILTER"
-      Whitespace@28..29 " "
-      Ascii40@29..30 "("
-      Where@30..35 "WHERE"
-      Whitespace@35..36 " "
-      AExpr@36..48
-        ColumnRef@36..43
-          String@36..43
-            Ident@36..43 "temp_lo"
-        Whitespace@43..44 " "
-        String@44..45
-          Ascii60@44..45 "<"
-        Whitespace@45..46 " "
-        AConst@46..48
-          Integer@46..48
-            Iconst@46..48 "45"
-      Ascii41@48..49 ")"
-  Ascii41@49..50 ")"
-  Ascii44@50..51 ","
-  Whitespace@51..52 " "
-  ResTarget@52..65
-    FuncCall@52..65
-      String@52..55
-        Ident@52..55 "max"
-      ColumnRef@55..64
-        String@55..64
-          Ascii40@55..56 "("
-          Ident@56..63 "temp_lo"
-          Ascii41@63..64 ")"
-      Ascii41@64..65 ")"
-  Ascii41@65..66 ")"
-  Newline@66..67 "\n"
-  Whitespace@67..71 "    "
-  From@71..75 "FROM"
-  Whitespace@75..76 " "
-  RangeVar@76..83
-    Ident@76..83 "weather"
-  Newline@83..84 "\n"
-  Whitespace@84..88 "    "
-  GroupP@88..93 "GROUP"
-  Whitespace@93..94 " "
-  By@94..96 "BY"
-  Whitespace@96..97 " "
-  ColumnRef@97..101
-    String@97..101
-      Ident@97..101 "city"
-  Ascii59@101..102 ";"
-
diff --git a/crates/parser/tests/snapshots/statements/valid/0002.snap b/crates/parser/tests/snapshots/statements/valid/0002.snap
index 45ebc465..ec7822f2 100644
--- a/crates/parser/tests/snapshots/statements/valid/0002.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0002.snap
@@ -2,14 +2,14 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "COPY weather FROM '/home/user/weather.txt';\n"
 ---
-CopyStmt@0..44
+CopyStmt@0..43
   Copy@0..4 "COPY"
   Whitespace@4..5 " "
-  Ident@5..12 "weather"
+  RangeVar@5..12
+    Ident@5..12 "weather"
   Whitespace@12..13 " "
   From@13..17 "FROM"
   Whitespace@17..18 " "
   Sconst@18..42 "'/home/user/weather.txt'"
   Ascii59@42..43 ";"
-  Newline@43..44 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0003.snap b/crates/parser/tests/snapshots/statements/valid/0003.snap
index 090b3a9a..8e53c49f 100644
--- a/crates/parser/tests/snapshots/statements/valid/0003.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0003.snap
@@ -2,55 +2,75 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE weather (\n        city      varchar(80) references cities(name),\n        temp_lo   int,\n        temp_hi   int,\n        prcp      real,\n        date      date\n);\n"
 ---
-CreateStmt@0..174
+CreateStmt@0..173
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..20 "weather"
+  RangeVar@13..20
+    Ident@13..20 "weather"
   Whitespace@20..21 " "
   Ascii40@21..22 "("
   Newline@22..23 "\n"
   Whitespace@23..31 "        "
-  Ident@31..35 "city"
-  Whitespace@35..41 "      "
-  Varchar@41..48 "varchar"
-  Ascii40@48..49 "("
-  Iconst@49..51 "80"
-  Ascii41@51..52 ")"
-  Whitespace@52..53 " "
-  References@53..63 "references"
-  Whitespace@63..64 " "
-  Ident@64..70 "cities"
-  Ascii40@70..71 "("
-  NameP@71..75 "name"
-  Ascii41@75..76 ")"
+  ColumnDef@31..76
+    Ident@31..35 "city"
+    Whitespace@35..41 "      "
+    TypeName@41..52
+      String@41..48
+        Varchar@41..48 "varchar"
+      Ascii40@48..49 "("
+      AConst@49..51
+        Integer@49..51
+          Iconst@49..51 "80"
+      Ascii41@51..52 ")"
+    Whitespace@52..53 " "
+    Constraint@53..76
+      References@53..63 "references"
+      Whitespace@63..64 " "
+      RangeVar@64..70
+        Ident@64..70 "cities"
+      Ascii40@70..71 "("
+      String@71..75
+        NameP@71..75 "name"
+      Ascii41@75..76 ")"
   Ascii44@76..77 ","
   Newline@77..78 "\n"
   Whitespace@78..86 "        "
-  Ident@86..93 "temp_lo"
-  Whitespace@93..96 "   "
-  IntP@96..99 "int"
+  ColumnDef@86..99
+    Ident@86..93 "temp_lo"
+    Whitespace@93..96 "   "
+    TypeName@96..99
+      String@96..99
+        IntP@96..99 "int"
   Ascii44@99..100 ","
   Newline@100..101 "\n"
   Whitespace@101..109 "        "
-  Ident@109..116 "temp_hi"
-  Whitespace@116..119 "   "
-  IntP@119..122 "int"
+  ColumnDef@109..122
+    Ident@109..116 "temp_hi"
+    Whitespace@116..119 "   "
+    TypeName@119..122
+      String@119..122
+        IntP@119..122 "int"
   Ascii44@122..123 ","
   Newline@123..124 "\n"
   Whitespace@124..132 "        "
-  Ident@132..136 "prcp"
-  Whitespace@136..142 "      "
-  Real@142..146 "real"
+  ColumnDef@132..146
+    Ident@132..136 "prcp"
+    Whitespace@136..142 "      "
+    TypeName@142..146
+      String@142..146
+        Real@142..146 "real"
   Ascii44@146..147 ","
   Newline@147..148 "\n"
   Whitespace@148..156 "        "
-  Ident@156..160 "date"
-  Whitespace@160..166 "      "
-  Ident@166..170 "date"
+  ColumnDef@156..170
+    Ident@156..160 "date"
+    Whitespace@160..166 "      "
+    TypeName@166..170
+      String@166..170
+        Ident@166..170 "date"
   Newline@170..171 "\n"
   Ascii41@171..172 ")"
   Ascii59@172..173 ";"
-  Newline@173..174 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0004.snap b/crates/parser/tests/snapshots/statements/valid/0004.snap
index d5e1524f..aa2ae312 100644
--- a/crates/parser/tests/snapshots/statements/valid/0004.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0004.snap
@@ -2,51 +2,78 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE VIEW myview AS\n    SELECT name, temp_lo, temp_hi, prcp, date, location\n        FROM weather, cities\n        WHERE city = name;\n"
 ---
-ViewStmt@0..134
+ViewStmt@0..133
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   View@7..11 "VIEW"
   Whitespace@11..12 " "
-  Ident@12..18 "myview"
+  RangeVar@12..18
+    Ident@12..18 "myview"
   Whitespace@18..19 " "
   As@19..21 "AS"
   Newline@21..22 "\n"
   Whitespace@22..26 "    "
-  Select@26..32 "SELECT"
-  Whitespace@32..33 " "
-  NameP@33..37 "name"
-  Ascii44@37..38 ","
-  Whitespace@38..39 " "
-  Ident@39..46 "temp_lo"
-  Ascii44@46..47 ","
-  Whitespace@47..48 " "
-  Ident@48..55 "temp_hi"
-  Ascii44@55..56 ","
-  Whitespace@56..57 " "
-  Ident@57..61 "prcp"
-  Ascii44@61..62 ","
-  Whitespace@62..63 " "
-  Ident@63..67 "date"
-  Ascii44@67..68 ","
-  Whitespace@68..69 " "
-  Location@69..77 "location"
-  Newline@77..78 "\n"
-  Whitespace@78..86 "        "
-  From@86..90 "FROM"
-  Whitespace@90..91 " "
-  Ident@91..98 "weather"
-  Ascii44@98..99 ","
-  Whitespace@99..100 " "
-  Ident@100..106 "cities"
-  Newline@106..107 "\n"
-  Whitespace@107..115 "        "
-  Where@115..120 "WHERE"
-  Whitespace@120..121 " "
-  Ident@121..125 "city"
-  Whitespace@125..126 " "
-  Ascii61@126..127 "="
-  Whitespace@127..128 " "
-  NameP@128..132 "name"
+  SelectStmt@26..132
+    Select@26..32 "SELECT"
+    Whitespace@32..33 " "
+    ResTarget@33..37
+      ColumnRef@33..37
+        String@33..37
+          NameP@33..37 "name"
+    Ascii44@37..38 ","
+    Whitespace@38..39 " "
+    ResTarget@39..46
+      ColumnRef@39..46
+        String@39..46
+          Ident@39..46 "temp_lo"
+    Ascii44@46..47 ","
+    Whitespace@47..48 " "
+    ResTarget@48..55
+      ColumnRef@48..55
+        String@48..55
+          Ident@48..55 "temp_hi"
+    Ascii44@55..56 ","
+    Whitespace@56..57 " "
+    ResTarget@57..61
+      ColumnRef@57..61
+        String@57..61
+          Ident@57..61 "prcp"
+    Ascii44@61..62 ","
+    Whitespace@62..63 " "
+    ResTarget@63..67
+      ColumnRef@63..67
+        String@63..67
+          Ident@63..67 "date"
+    Ascii44@67..68 ","
+    Whitespace@68..69 " "
+    ResTarget@69..77
+      ColumnRef@69..77
+        String@69..77
+          Location@69..77 "location"
+    Newline@77..78 "\n"
+    Whitespace@78..86 "        "
+    From@86..90 "FROM"
+    Whitespace@90..91 " "
+    RangeVar@91..98
+      Ident@91..98 "weather"
+    Ascii44@98..99 ","
+    Whitespace@99..100 " "
+    RangeVar@100..106
+      Ident@100..106 "cities"
+    Newline@106..107 "\n"
+    Whitespace@107..115 "        "
+    Where@115..120 "WHERE"
+    Whitespace@120..121 " "
+    AExpr@121..132
+      ColumnRef@121..125
+        String@121..125
+          Ident@121..125 "city"
+      Whitespace@125..126 " "
+      String@126..127
+        Ascii61@126..127 "="
+      Whitespace@127..128 " "
+      ColumnRef@128..132
+        String@128..132
+          NameP@128..132 "name"
   Ascii59@132..133 ";"
-  Newline@133..134 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0005.snap b/crates/parser/tests/snapshots/statements/valid/0005.snap
index d99908db..bb3a6ef7 100644
--- a/crates/parser/tests/snapshots/statements/valid/0005.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0005.snap
@@ -2,20 +2,26 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "DELETE FROM weather WHERE city = 'Hayward';\n"
 ---
-DeleteStmt@0..44
+DeleteStmt@0..43
   DeleteP@0..6 "DELETE"
   Whitespace@6..7 " "
   From@7..11 "FROM"
   Whitespace@11..12 " "
-  Ident@12..19 "weather"
+  RangeVar@12..19
+    Ident@12..19 "weather"
   Whitespace@19..20 " "
   Where@20..25 "WHERE"
   Whitespace@25..26 " "
-  Ident@26..30 "city"
-  Whitespace@30..31 " "
-  Ascii61@31..32 "="
-  Whitespace@32..33 " "
-  Sconst@33..42 "'Hayward'"
+  AExpr@26..42
+    ColumnRef@26..30
+      String@26..30
+        Ident@26..30 "city"
+    Whitespace@30..31 " "
+    String@31..32
+      Ascii61@31..32 "="
+    Whitespace@32..33 " "
+    AConst@33..42
+      String@33..42
+        Sconst@33..42 "'Hayward'"
   Ascii59@42..43 ";"
-  Newline@43..44 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0006.snap b/crates/parser/tests/snapshots/statements/valid/0006.snap
index 37cfd337..81fda79f 100644
--- a/crates/parser/tests/snapshots/statements/valid/0006.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0006.snap
@@ -2,12 +2,13 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "DROP TABLE tablename;\n"
 ---
-DropStmt@0..22
+DropStmt@0..21
   Drop@0..4 "DROP"
   Whitespace@4..5 " "
   Table@5..10 "TABLE"
   Whitespace@10..11 " "
-  Ident@11..20 "tablename"
+  List@11..20
+    String@11..20
+      Ident@11..20 "tablename"
   Ascii59@20..21 ";"
-  Newline@21..22 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0007.snap b/crates/parser/tests/snapshots/statements/valid/0007.snap
index 61ccfb22..f4235d2c 100644
--- a/crates/parser/tests/snapshots/statements/valid/0007.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0007.snap
@@ -2,35 +2,44 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE cities (\n  name       text,\n  population real,\n  elevation  int     -- (in ft)\n);\n\n"
 ---
-CreateStmt@0..96
+CreateStmt@0..94
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..19 "cities"
+  RangeVar@13..19
+    Ident@13..19 "cities"
   Whitespace@19..20 " "
   Ascii40@20..21 "("
   Newline@21..22 "\n"
   Whitespace@22..24 "  "
-  NameP@24..28 "name"
-  Whitespace@28..35 "       "
-  TextP@35..39 "text"
+  ColumnDef@24..39
+    NameP@24..28 "name"
+    Whitespace@28..35 "       "
+    TypeName@35..39
+      String@35..39
+        TextP@35..39 "text"
   Ascii44@39..40 ","
   Newline@40..41 "\n"
   Whitespace@41..43 "  "
-  Ident@43..53 "population"
-  Whitespace@53..54 " "
-  Real@54..58 "real"
+  ColumnDef@43..58
+    Ident@43..53 "population"
+    Whitespace@53..54 " "
+    TypeName@54..58
+      String@54..58
+        Real@54..58 "real"
   Ascii44@58..59 ","
   Newline@59..60 "\n"
   Whitespace@60..62 "  "
-  Ident@62..71 "elevation"
-  Whitespace@71..73 "  "
-  IntP@73..76 "int"
+  ColumnDef@62..76
+    Ident@62..71 "elevation"
+    Whitespace@71..73 "  "
+    TypeName@73..76
+      String@73..76
+        IntP@73..76 "int"
   Whitespace@76..81 "     "
   SqlComment@81..91 "-- (in ft)"
   Newline@91..92 "\n"
   Ascii41@92..93 ")"
   Ascii59@93..94 ";"
-  Newline@94..96 "\n\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0008.snap b/crates/parser/tests/snapshots/statements/valid/0008.snap
index a33927f9..14a3c516 100644
--- a/crates/parser/tests/snapshots/statements/valid/0008.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0008.snap
@@ -2,41 +2,55 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "INSERT INTO weather (date, city, temp_hi, temp_lo)\n    VALUES ('1994-11-29', 'Hayward', 54, 37);\n"
 ---
-InsertStmt@0..97
+InsertStmt@0..96
   Insert@0..6 "INSERT"
   Whitespace@6..7 " "
   Into@7..11 "INTO"
   Whitespace@11..12 " "
-  Ident@12..19 "weather"
+  RangeVar@12..19
+    Ident@12..19 "weather"
   Whitespace@19..20 " "
   Ascii40@20..21 "("
-  Ident@21..25 "date"
+  ResTarget@21..25
+    Ident@21..25 "date"
   Ascii44@25..26 ","
   Whitespace@26..27 " "
-  Ident@27..31 "city"
+  ResTarget@27..31
+    Ident@27..31 "city"
   Ascii44@31..32 ","
   Whitespace@32..33 " "
-  Ident@33..40 "temp_hi"
+  ResTarget@33..40
+    Ident@33..40 "temp_hi"
   Ascii44@40..41 ","
   Whitespace@41..42 " "
-  Ident@42..49 "temp_lo"
+  ResTarget@42..49
+    Ident@42..49 "temp_lo"
   Ascii41@49..50 ")"
   Newline@50..51 "\n"
   Whitespace@51..55 "    "
   Values@55..61 "VALUES"
   Whitespace@61..62 " "
   Ascii40@62..63 "("
-  Sconst@63..75 "'1994-11-29'"
-  Ascii44@75..76 ","
-  Whitespace@76..77 " "
-  Sconst@77..86 "'Hayward'"
-  Ascii44@86..87 ","
-  Whitespace@87..88 " "
-  Iconst@88..90 "54"
-  Ascii44@90..91 ","
-  Whitespace@91..92 " "
-  Iconst@92..94 "37"
+  SelectStmt@63..94
+    List@63..94
+      AConst@63..75
+        String@63..75
+          Sconst@63..75 "'1994-11-29'"
+      Ascii44@75..76 ","
+      Whitespace@76..77 " "
+      AConst@77..86
+        String@77..86
+          Sconst@77..86 "'Hayward'"
+      Ascii44@86..87 ","
+      Whitespace@87..88 " "
+      AConst@88..90
+        Integer@88..90
+          Iconst@88..90 "54"
+      Ascii44@90..91 ","
+      Whitespace@91..92 " "
+      AConst@92..94
+        Integer@92..94
+          Iconst@92..94 "37"
   Ascii41@94..95 ")"
   Ascii59@95..96 ";"
-  Newline@96..97 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0009.snap b/crates/parser/tests/snapshots/statements/valid/0009.snap
index bf17d60a..fe1cba70 100644
--- a/crates/parser/tests/snapshots/statements/valid/0009.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0009.snap
@@ -2,92 +2,137 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT w1.city, w1.temp_lo AS low, w1.temp_hi AS high,\n       w2.city, w2.temp_lo AS low, w2.temp_hi AS high\n    FROM weather w1 JOIN weather w2\n        ON w1.temp_lo < w2.temp_lo AND w1.temp_hi > w2.temp_hi;\n"
 ---
-SelectStmt@0..209
+SelectStmt@0..208
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Ident@7..9 "w1"
-  Ascii46@9..10 "."
-  Ident@10..14 "city"
+  ResTarget@7..14
+    ColumnRef@7..14
+      String@7..9
+        Ident@7..9 "w1"
+      Ascii46@9..10 "."
+      String@10..14
+        Ident@10..14 "city"
   Ascii44@14..15 ","
   Whitespace@15..16 " "
-  Ident@16..18 "w1"
-  Ascii46@18..19 "."
-  Ident@19..26 "temp_lo"
-  Whitespace@26..27 " "
-  As@27..29 "AS"
-  Whitespace@29..30 " "
-  Ident@30..33 "low"
+  ResTarget@16..33
+    ColumnRef@16..26
+      String@16..18
+        Ident@16..18 "w1"
+      Ascii46@18..19 "."
+      String@19..26
+        Ident@19..26 "temp_lo"
+    Whitespace@26..27 " "
+    As@27..29 "AS"
+    Whitespace@29..30 " "
+    Ident@30..33 "low"
   Ascii44@33..34 ","
   Whitespace@34..35 " "
-  Ident@35..37 "w1"
-  Ascii46@37..38 "."
-  Ident@38..45 "temp_hi"
-  Whitespace@45..46 " "
-  As@46..48 "AS"
-  Whitespace@48..49 " "
-  Ident@49..53 "high"
+  ResTarget@35..53
+    ColumnRef@35..45
+      String@35..37
+        Ident@35..37 "w1"
+      Ascii46@37..38 "."
+      String@38..45
+        Ident@38..45 "temp_hi"
+    Whitespace@45..46 " "
+    As@46..48 "AS"
+    Whitespace@48..49 " "
+    Ident@49..53 "high"
   Ascii44@53..54 ","
   Newline@54..55 "\n"
   Whitespace@55..62 "       "
-  Ident@62..64 "w2"
-  Ascii46@64..65 "."
-  Ident@65..69 "city"
+  ResTarget@62..69
+    ColumnRef@62..69
+      String@62..64
+        Ident@62..64 "w2"
+      Ascii46@64..65 "."
+      String@65..69
+        Ident@65..69 "city"
   Ascii44@69..70 ","
   Whitespace@70..71 " "
-  Ident@71..73 "w2"
-  Ascii46@73..74 "."
-  Ident@74..81 "temp_lo"
-  Whitespace@81..82 " "
-  As@82..84 "AS"
-  Whitespace@84..85 " "
-  Ident@85..88 "low"
+  ResTarget@71..88
+    ColumnRef@71..81
+      String@71..73
+        Ident@71..73 "w2"
+      Ascii46@73..74 "."
+      String@74..81
+        Ident@74..81 "temp_lo"
+    Whitespace@81..82 " "
+    As@82..84 "AS"
+    Whitespace@84..85 " "
+    Ident@85..88 "low"
   Ascii44@88..89 ","
   Whitespace@89..90 " "
-  Ident@90..92 "w2"
-  Ascii46@92..93 "."
-  Ident@93..100 "temp_hi"
-  Whitespace@100..101 " "
-  As@101..103 "AS"
-  Whitespace@103..104 " "
-  Ident@104..108 "high"
+  ResTarget@90..108
+    ColumnRef@90..100
+      String@90..92
+        Ident@90..92 "w2"
+      Ascii46@92..93 "."
+      String@93..100
+        Ident@93..100 "temp_hi"
+    Whitespace@100..101 " "
+    As@101..103 "AS"
+    Whitespace@103..104 " "
+    Ident@104..108 "high"
   Newline@108..109 "\n"
   Whitespace@109..113 "    "
   From@113..117 "FROM"
   Whitespace@117..118 " "
-  Ident@118..125 "weather"
-  Whitespace@125..126 " "
-  Ident@126..128 "w1"
-  Whitespace@128..129 " "
-  Join@129..133 "JOIN"
-  Whitespace@133..134 " "
-  Ident@134..141 "weather"
-  Whitespace@141..142 " "
-  Ident@142..144 "w2"
-  Newline@144..145 "\n"
-  Whitespace@145..153 "        "
-  On@153..155 "ON"
-  Whitespace@155..156 " "
-  Ident@156..158 "w1"
-  Ascii46@158..159 "."
-  Ident@159..166 "temp_lo"
-  Whitespace@166..167 " "
-  Ascii60@167..168 "<"
-  Whitespace@168..169 " "
-  Ident@169..171 "w2"
-  Ascii46@171..172 "."
-  Ident@172..179 "temp_lo"
-  Whitespace@179..180 " "
-  And@180..183 "AND"
-  Whitespace@183..184 " "
-  Ident@184..186 "w1"
-  Ascii46@186..187 "."
-  Ident@187..194 "temp_hi"
-  Whitespace@194..195 " "
-  Ascii62@195..196 ">"
-  Whitespace@196..197 " "
-  Ident@197..199 "w2"
-  Ascii46@199..200 "."
-  Ident@200..207 "temp_hi"
+  JoinExpr@118..207
+    RangeVar@118..128
+      Ident@118..125 "weather"
+      Whitespace@125..126 " "
+      Alias@126..128
+        Ident@126..128 "w1"
+    Whitespace@128..129 " "
+    Join@129..133 "JOIN"
+    Whitespace@133..134 " "
+    RangeVar@134..144
+      Ident@134..141 "weather"
+      Whitespace@141..142 " "
+      Alias@142..144
+        Ident@142..144 "w2"
+    Newline@144..145 "\n"
+    Whitespace@145..153 "        "
+    On@153..155 "ON"
+    Whitespace@155..156 " "
+    BoolExpr@156..207
+      AExpr@156..179
+        ColumnRef@156..166
+          String@156..158
+            Ident@156..158 "w1"
+          Ascii46@158..159 "."
+          String@159..166
+            Ident@159..166 "temp_lo"
+        Whitespace@166..167 " "
+        String@167..168
+          Ascii60@167..168 "<"
+        Whitespace@168..169 " "
+        ColumnRef@169..179
+          String@169..171
+            Ident@169..171 "w2"
+          Ascii46@171..172 "."
+          String@172..179
+            Ident@172..179 "temp_lo"
+      Whitespace@179..180 " "
+      And@180..183 "AND"
+      Whitespace@183..184 " "
+      AExpr@184..207
+        ColumnRef@184..194
+          String@184..186
+            Ident@184..186 "w1"
+          Ascii46@186..187 "."
+          String@187..194
+            Ident@187..194 "temp_hi"
+        Whitespace@194..195 " "
+        String@195..196
+          Ascii62@195..196 ">"
+        Whitespace@196..197 " "
+        ColumnRef@197..207
+          String@197..199
+            Ident@197..199 "w2"
+          Ascii46@199..200 "."
+          String@200..207
+            Ident@200..207 "temp_hi"
   Ascii59@207..208 ";"
-  Newline@208..209 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0010.snap b/crates/parser/tests/snapshots/statements/valid/0010.snap
index 402b5c07..bef5bc54 100644
--- a/crates/parser/tests/snapshots/statements/valid/0010.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0010.snap
@@ -2,30 +2,42 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "INSERT INTO weather VALUES ('San Francisco', 46, 50, 0.25, '1994-11-27');\n"
 ---
-InsertStmt@0..74
+InsertStmt@0..73
   Insert@0..6 "INSERT"
   Whitespace@6..7 " "
   Into@7..11 "INTO"
   Whitespace@11..12 " "
-  Ident@12..19 "weather"
+  RangeVar@12..19
+    Ident@12..19 "weather"
   Whitespace@19..20 " "
   Values@20..26 "VALUES"
   Whitespace@26..27 " "
   Ascii40@27..28 "("
-  Sconst@28..43 "'San Francisco'"
-  Ascii44@43..44 ","
-  Whitespace@44..45 " "
-  Iconst@45..47 "46"
-  Ascii44@47..48 ","
-  Whitespace@48..49 " "
-  Iconst@49..51 "50"
-  Ascii44@51..52 ","
-  Whitespace@52..53 " "
-  Fconst@53..57 "0.25"
-  Ascii44@57..58 ","
-  Whitespace@58..59 " "
-  Sconst@59..71 "'1994-11-27'"
+  SelectStmt@28..71
+    List@28..71
+      AConst@28..43
+        String@28..43
+          Sconst@28..43 "'San Francisco'"
+      Ascii44@43..44 ","
+      Whitespace@44..45 " "
+      AConst@45..47
+        Integer@45..47
+          Iconst@45..47 "46"
+      Ascii44@47..48 ","
+      Whitespace@48..49 " "
+      AConst@49..51
+        Integer@49..51
+          Iconst@49..51 "50"
+      Ascii44@51..52 ","
+      Whitespace@52..53 " "
+      AConst@53..57
+        Float@53..57
+          Fconst@53..57 "0.25"
+      Ascii44@57..58 ","
+      Whitespace@58..59 " "
+      AConst@59..71
+        String@59..71
+          Sconst@59..71 "'1994-11-27'"
   Ascii41@71..72 ")"
   Ascii59@72..73 ";"
-  Newline@73..74 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0011.snap b/crates/parser/tests/snapshots/statements/valid/0011.snap
index 48d7b674..2f2bb2d8 100644
--- a/crates/parser/tests/snapshots/statements/valid/0011.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0011.snap
@@ -2,24 +2,30 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT DISTINCT city\n    FROM weather\n    ORDER BY city;\n"
 ---
-SelectStmt@0..57
+SelectStmt@0..56
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
   Distinct@7..15 "DISTINCT"
   Whitespace@15..16 " "
-  Ident@16..20 "city"
+  ResTarget@16..20
+    ColumnRef@16..20
+      String@16..20
+        Ident@16..20 "city"
   Newline@20..21 "\n"
   Whitespace@21..25 "    "
   From@25..29 "FROM"
   Whitespace@29..30 " "
-  Ident@30..37 "weather"
+  RangeVar@30..37
+    Ident@30..37 "weather"
   Newline@37..38 "\n"
   Whitespace@38..42 "    "
-  Order@42..47 "ORDER"
-  Whitespace@47..48 " "
-  By@48..50 "BY"
-  Whitespace@50..51 " "
-  Ident@51..55 "city"
+  SortBy@42..55
+    Order@42..47 "ORDER"
+    Whitespace@47..48 " "
+    By@48..50 "BY"
+    Whitespace@50..51 " "
+    ColumnRef@51..55
+      String@51..55
+        Ident@51..55 "city"
   Ascii59@55..56 ";"
-  Newline@56..57 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0012.snap.new b/crates/parser/tests/snapshots/statements/valid/0012.snap.new
new file mode 100644
index 00000000..f956dd31
--- /dev/null
+++ b/crates/parser/tests/snapshots/statements/valid/0012.snap.new
@@ -0,0 +1,61 @@
+---
+source: crates/parser/tests/statement_parser_test.rs
+assertion_line: 39
+description: "CREATE TABLE measurement_y2008m01 PARTITION OF measurement\n    FOR VALUES FROM ('2008-01-01') TO ('2008-02-01')\n    WITH (parallel_workers = 4)\n    TABLESPACE fasttablespace;\n"
+---
+CreateStmt@0..174
+  Create@0..6 "CREATE"
+  Whitespace@6..7 " "
+  Table@7..12 "TABLE"
+  Whitespace@12..13 " "
+  RangeVar@13..33
+    Ident@13..33 "measurement_y2008m01"
+  Whitespace@33..34 " "
+  Partition@34..43 "PARTITION"
+  Whitespace@43..44 " "
+  Of@44..46 "OF"
+  Whitespace@46..47 " "
+  RangeVar@47..58
+    Ident@47..58 "measurement"
+  Newline@58..59 "\n"
+  Whitespace@59..63 "    "
+  For@63..66 "FOR"
+  Whitespace@66..67 " "
+  Values@67..73 "VALUES"
+  Whitespace@73..74 " "
+  PartitionBoundSpec@74..111
+    From@74..78 "FROM"
+    Whitespace@78..79 " "
+    Ascii40@79..80 "("
+    AConst@80..92
+      String@80..92
+        Sconst@80..92 "'2008-01-01'"
+    Ascii41@92..93 ")"
+    Whitespace@93..94 " "
+    To@94..96 "TO"
+    Whitespace@96..97 " "
+    Ascii40@97..98 "("
+    AConst@98..110
+      String@98..110
+        Sconst@98..110 "'2008-02-01'"
+    Ascii41@110..111 ")"
+  Newline@111..112 "\n"
+  Whitespace@112..116 "    "
+  With@116..120 "WITH"
+  Whitespace@120..121 " "
+  Ascii40@121..122 "("
+  DefElem@122..142
+    Ident@122..138 "parallel_workers"
+    Whitespace@138..139 " "
+    Ascii61@139..140 "="
+    Whitespace@140..141 " "
+    Integer@141..142
+      Iconst@141..142 "4"
+  Ascii41@142..143 ")"
+  Newline@143..144 "\n"
+  Whitespace@144..148 "    "
+  Tablespace@148..158 "TABLESPACE"
+  Whitespace@158..159 " "
+  Ident@159..173 "fasttablespace"
+  Ascii59@173..174 ";"
+
diff --git a/crates/parser/tests/snapshots/statements/valid/0013.snap.new b/crates/parser/tests/snapshots/statements/valid/0013.snap.new
new file mode 100644
index 00000000..2d1f7551
--- /dev/null
+++ b/crates/parser/tests/snapshots/statements/valid/0013.snap.new
@@ -0,0 +1,65 @@
+---
+source: crates/parser/tests/statement_parser_test.rs
+assertion_line: 39
+description: "UPDATE weather\n    SET temp_hi = temp_hi - 2,  temp_lo = temp_lo - 2\n    WHERE date > '1994-11-28';\n"
+---
+UpdateStmt@0..99
+  Update@0..6 "UPDATE"
+  Whitespace@6..7 " "
+  RangeVar@7..14
+    Ident@7..14 "weather"
+  Newline@14..15 "\n"
+  Whitespace@15..19 "    "
+  Set@19..22 "SET"
+  Whitespace@22..23 " "
+  ResTarget@23..44
+    Ident@23..30 "temp_hi"
+    Whitespace@30..31 " "
+    Ascii61@31..32 "="
+    Whitespace@32..33 " "
+    AExpr@33..44
+      ColumnRef@33..40
+        String@33..40
+          Ident@33..40 "temp_hi"
+      Whitespace@40..41 " "
+      String@41..42
+        Ascii45@41..42 "-"
+      Whitespace@42..43 " "
+      AConst@43..44
+        Integer@43..44
+          Iconst@43..44 "2"
+  Ascii44@44..45 ","
+  Whitespace@45..47 "  "
+  ResTarget@47..68
+    Ident@47..54 "temp_lo"
+    Whitespace@54..55 " "
+    Ascii61@55..56 "="
+    Whitespace@56..57 " "
+    AExpr@57..68
+      ColumnRef@57..64
+        String@57..64
+          Ident@57..64 "temp_lo"
+      Whitespace@64..65 " "
+      String@65..66
+        Ascii45@65..66 "-"
+      Whitespace@66..67 " "
+      AConst@67..68
+        Integer@67..68
+          Iconst@67..68 "2"
+  Newline@68..69 "\n"
+  Whitespace@69..73 "    "
+  Where@73..78 "WHERE"
+  Whitespace@78..79 " "
+  AExpr@79..98
+    ColumnRef@79..83
+      String@79..83
+        Ident@79..83 "date"
+    Whitespace@83..84 " "
+    String@84..85
+      Ascii62@84..85 ">"
+    Whitespace@85..86 " "
+    AConst@86..98
+      String@86..98
+        Sconst@86..98 "'1994-11-28'"
+  Ascii59@98..99 ";"
+
diff --git a/crates/parser/tests/snapshots/statements/valid/0014.snap b/crates/parser/tests/snapshots/statements/valid/0014.snap
index 80ceaf6d..490b0142 100644
--- a/crates/parser/tests/snapshots/statements/valid/0014.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0014.snap
@@ -2,55 +2,73 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT sum(salary) OVER w, avg(salary) OVER w\n  FROM empsalary\n  WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);\n"
 ---
-SelectStmt@0..122
+SelectStmt@0..121
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Ident@7..10 "sum"
-  Ascii40@10..11 "("
-  Ident@11..17 "salary"
-  Ascii41@17..18 ")"
-  Whitespace@18..19 " "
-  Over@19..23 "OVER"
-  Whitespace@23..24 " "
-  Ident@24..25 "w"
-  Ascii44@25..26 ","
-  Whitespace@26..27 " "
-  Ident@27..30 "avg"
-  Ascii40@30..31 "("
-  Ident@31..37 "salary"
-  Ascii41@37..38 ")"
-  Whitespace@38..39 " "
-  Over@39..43 "OVER"
-  Whitespace@43..44 " "
-  Ident@44..45 "w"
-  Newline@45..46 "\n"
-  Whitespace@46..48 "  "
-  From@48..52 "FROM"
-  Whitespace@52..53 " "
-  Ident@53..62 "empsalary"
-  Newline@62..63 "\n"
-  Whitespace@63..65 "  "
-  Window@65..71 "WINDOW"
-  Whitespace@71..72 " "
-  Ident@72..73 "w"
-  Whitespace@73..74 " "
-  As@74..76 "AS"
-  Whitespace@76..77 " "
-  Ascii40@77..78 "("
-  Partition@78..87 "PARTITION"
-  Whitespace@87..88 " "
-  By@88..90 "BY"
-  Whitespace@90..91 " "
-  Ident@91..98 "depname"
-  Whitespace@98..99 " "
-  Order@99..104 "ORDER"
-  Whitespace@104..105 " "
-  By@105..107 "BY"
-  Whitespace@107..108 " "
-  Ident@108..114 "salary"
-  Whitespace@114..115 " "
-  Desc@115..119 "DESC"
-  Ascii41@119..120 ")"
+  ResTarget@7..120
+    FuncCall@7..120
+      String@7..10
+        Ident@7..10 "sum"
+      Ascii40@10..11 "("
+      ColumnRef@11..17
+        String@11..17
+          Ident@11..17 "salary"
+      Ascii41@17..18 ")"
+      Whitespace@18..19 " "
+      Over@19..23 "OVER"
+      Whitespace@23..24 " "
+      WindowDef@24..120
+        Ident@24..25 "w"
+        Ascii44@25..26 ","
+        Whitespace@26..27 " "
+        ResTarget@27..120
+          FuncCall@27..120
+            String@27..30
+              Ident@27..30 "avg"
+            Ascii40@30..31 "("
+            ColumnRef@31..37
+              String@31..37
+                Ident@31..37 "salary"
+            Ascii41@37..38 ")"
+            Whitespace@38..39 " "
+            Over@39..43 "OVER"
+            Whitespace@43..44 " "
+            WindowDef@44..120
+              Ident@44..45 "w"
+              Newline@45..46 "\n"
+              Whitespace@46..48 "  "
+              From@48..52 "FROM"
+              Whitespace@52..53 " "
+              RangeVar@53..62
+                Ident@53..62 "empsalary"
+              Newline@62..63 "\n"
+              Whitespace@63..65 "  "
+              Window@65..71 "WINDOW"
+              Whitespace@71..72 " "
+              Ident@72..73 "w"
+              Whitespace@73..74 " "
+              As@74..76 "AS"
+              Whitespace@76..77 " "
+              WindowDef@77..120
+                Ascii40@77..78 "("
+                Partition@78..87 "PARTITION"
+                Whitespace@87..88 " "
+                By@88..90 "BY"
+                Whitespace@90..91 " "
+                ColumnRef@91..98
+                  String@91..98
+                    Ident@91..98 "depname"
+                Whitespace@98..99 " "
+                SortBy@99..119
+                  Order@99..104 "ORDER"
+                  Whitespace@104..105 " "
+                  By@105..107 "BY"
+                  Whitespace@107..108 " "
+                  ColumnRef@108..114
+                    String@108..114
+                      Ident@108..114 "salary"
+                  Whitespace@114..115 " "
+                  Desc@115..119 "DESC"
+                Ascii41@119..120 ")"
   Ascii59@120..121 ";"
-  Newline@121..122 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0015.snap.new b/crates/parser/tests/snapshots/statements/valid/0015.snap.new
new file mode 100644
index 00000000..89d0aa08
--- /dev/null
+++ b/crates/parser/tests/snapshots/statements/valid/0015.snap.new
@@ -0,0 +1,80 @@
+---
+source: crates/parser/tests/statement_parser_test.rs
+assertion_line: 39
+description: "SELECT\n    count(*) AS unfiltered,\n    count(*) FILTER (WHERE i < 5) AS filtered\nFROM generate_series(1,10) AS s(i);\n"
+---
+SelectStmt@0..116
+  Select@0..6 "SELECT"
+  Newline@6..7 "\n"
+  Whitespace@7..11 "    "
+  ResTarget@11..33
+    FuncCall@11..16
+      String@11..16
+        Ident@11..16 "count"
+    Ascii40@16..17 "("
+    Ascii42@17..18 "*"
+    Ascii41@18..19 ")"
+    Whitespace@19..20 " "
+    As@20..22 "AS"
+    Whitespace@22..23 " "
+    Ident@23..33 "unfiltered"
+  Ascii44@33..34 ","
+  Newline@34..35 "\n"
+  Whitespace@35..39 "    "
+  ResTarget@39..112
+    FuncCall@39..112
+      String@39..44
+        Ident@39..44 "count"
+      Ascii40@44..45 "("
+      Ascii42@45..46 "*"
+      Ascii41@46..47 ")"
+      Whitespace@47..48 " "
+      Filter@48..54 "FILTER"
+      Whitespace@54..55 " "
+      Ascii40@55..56 "("
+      Where@56..61 "WHERE"
+      Whitespace@61..62 " "
+      RangeFunction@62..112
+        Alias@62..112
+          String@62..67
+            AExpr@62..67
+              ColumnRef@62..63
+                String@62..63
+                  Ident@62..63 "i"
+              Whitespace@63..64 " "
+              String@64..65
+                Ascii60@64..65 "<"
+              Whitespace@65..66 " "
+              AConst@66..67
+                Integer@66..67
+                  Iconst@66..67 "5"
+          Ascii41@67..68 ")"
+          Whitespace@68..69 " "
+          As@69..71 "AS"
+          Whitespace@71..72 " "
+          Ident@72..80 "filtered"
+          Newline@80..81 "\n"
+          From@81..85 "FROM"
+          Whitespace@85..86 " "
+          List@86..107
+            FuncCall@86..107
+              String@86..101
+                Ident@86..101 "generate_series"
+              Ascii40@101..102 "("
+              AConst@102..103
+                Integer@102..103
+                  Iconst@102..103 "1"
+              Ascii44@103..104 ","
+              AConst@104..106
+                Integer@104..106
+                  Iconst@104..106 "10"
+              Ascii41@106..107 ")"
+          Whitespace@107..108 " "
+          As@108..110 "AS"
+          Whitespace@110..111 " "
+          Ident@111..112 "s"
+  Ascii40@112..113 "("
+  Ident@113..114 "i"
+  Ascii41@114..115 ")"
+  Ascii59@115..116 ";"
+
diff --git a/crates/parser/tests/statement_parser_test.rs b/crates/parser/tests/statement_parser_test.rs
index 77542df1..7ffbaf56 100644
--- a/crates/parser/tests/statement_parser_test.rs
+++ b/crates/parser/tests/statement_parser_test.rs
@@ -1,6 +1,7 @@
 use std::fs;
 mod common;
 use insta;
+use log::debug;
 use parser::Parser;
 
 const VALID_STATEMENTS_PATH: &str = "tests/data/statements/valid/";
@@ -22,6 +23,8 @@ fn valid_statements() {
 
         let contents = fs::read_to_string(&path).unwrap();
 
+        debug!("Parsing statement: {}", test_name);
+
         let mut parser = Parser::new();
         parser.parse_statement_at(&contents, None);
         let parsed = parser.finish();

From f6cc9b018abb41b007dd947961f05fae2920f46b Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Thu, 12 Oct 2023 22:13:38 +0200
Subject: [PATCH 13/16] fix: minor fixes while making tests green

---
 crates/codegen/src/get_child_token_range.rs   | 110 +++++++---
 crates/parser/src/estimate_node_range.rs      | 190 +++++++++++-------
 crates/parser/src/source_parser.rs            |   2 +-
 crates/parser/src/statement_parser.rs         |  42 ++++
 .../snapshots/statements/valid/0012.snap      |  48 +++--
 .../snapshots/statements/valid/0012.snap.new  |  61 ------
 .../snapshots/statements/valid/0013.snap      |  72 ++++---
 .../snapshots/statements/valid/0013.snap.new  |  65 ------
 .../snapshots/statements/valid/0014.snap      | 112 +++++------
 .../snapshots/statements/valid/0015.snap      | 107 ++++++----
 .../snapshots/statements/valid/0015.snap.new  |  80 --------
 .../snapshots/statements/valid/0016.snap      |  36 ++--
 .../snapshots/statements/valid/0017.snap      |  76 ++++---
 .../snapshots/statements/valid/0018.snap      |  34 ++--
 .../snapshots/statements/valid/0019.snap      |  73 ++++---
 .../snapshots/statements/valid/0020.snap      |  69 ++++---
 .../snapshots/statements/valid/0021.snap      |  59 +++---
 .../snapshots/statements/valid/0022.snap      |  44 ++--
 .../snapshots/statements/valid/0023.snap      | 121 +++++++----
 .../snapshots/statements/valid/0024.snap      |  76 ++++---
 .../snapshots/statements/valid/0025.snap      |  24 ++-
 .../snapshots/statements/valid/0026.snap      |  34 ++--
 .../snapshots/statements/valid/0027.snap      |  12 +-
 .../snapshots/statements/valid/0028.snap      |   6 +-
 .../snapshots/statements/valid/0029.snap      |  33 +--
 .../snapshots/statements/valid/0030.snap      |  24 ++-
 .../snapshots/statements/valid/0031.snap      | 115 ++++++-----
 .../snapshots/statements/valid/0032.snap      |  11 +-
 .../snapshots/statements/valid/0033.snap      |  62 +++---
 .../snapshots/statements/valid/0034.snap      |  32 ++-
 .../snapshots/statements/valid/0035.snap      |  32 ++-
 .../snapshots/statements/valid/0036.snap      |  46 +++--
 32 files changed, 1074 insertions(+), 834 deletions(-)
 delete mode 100644 crates/parser/tests/snapshots/statements/valid/0012.snap.new
 delete mode 100644 crates/parser/tests/snapshots/statements/valid/0013.snap.new
 delete mode 100644 crates/parser/tests/snapshots/statements/valid/0015.snap.new

diff --git a/crates/codegen/src/get_child_token_range.rs b/crates/codegen/src/get_child_token_range.rs
index 40c9c4fe..fed68c2f 100644
--- a/crates/codegen/src/get_child_token_range.rs
+++ b/crates/codegen/src/get_child_token_range.rs
@@ -144,17 +144,22 @@ pub fn get_child_token_range_mod(_item: proc_macro2::TokenStream) -> proc_macro2
             return vec![text];
         }
 
-        pub struct ChildTokenRange {
-            /// the .start of all child tokens used to estimate the range
-            pub child_token_indices: Vec<i32>,
-            pub range: Option<TextRange>
+        #[derive(Debug)]
+        pub enum ChildTokenRangeResult {
+            TooManyTokens,
+            NoTokens,
+            /// indices are the .start of all child tokens used to estimate the range
+            ChildTokenRange { used_token_indices: Vec<i32>, range: TextRange },
         }
 
-        pub fn get_child_token_range(node: &NodeEnum, tokens: Vec<&ScanToken>, text: &str, nearest_parent_location: u32) -> ChildTokenRange {
-            let mut child_tokens = Vec::new();
+        pub fn get_child_token_range(node: &NodeEnum, tokens: Vec<&ScanToken>, text: &str, nearest_parent_location: Option<u32>) -> ChildTokenRangeResult {
+            let mut child_tokens: Vec<&ScanToken> = Vec::new();
+
+            // if true, we found more than one valid token for at least one property of the node
+            let mut has_too_many_tokens: bool = false;
 
             let mut get_token = |property: TokenProperty| {
-                let token = tokens
+                let possible_tokens = tokens
                     .iter()
                     .filter_map(|t| {
                         if property.token.is_some() {
@@ -178,42 +183,59 @@ pub fn get_child_token_range_mod(_item: proc_macro2::TokenStream) -> proc_macro2
                             }
                         }
 
-                        // if the token is before the nearest parent location, we can safely ignore it
-                        // if not, we calculate the distance to the nearest parent location
-                        let distance = t.start - nearest_parent_location as i32;
-                        if distance >= 0 {
-                            Some((distance, t))
-                        } else {
-                            None
-                        }
+                        Some(t)
                     })
-                    // and use the token with the smallest distance to the nearest parent location
-                    .min_by_key(|(d, _)| d.to_owned())
-                    .map(|(_, t)| t);
+                    .collect::<Vec<&&ScanToken>>();
 
-                if token.is_some() {
-                    child_tokens.push(token.unwrap());
-                } else {
+                if possible_tokens.len() == 0 {
                     debug!(
                         "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}",
                         property, node, text, tokens
                     );
+                    return;
+                }
+
+                if possible_tokens.len() == 1 {
+                    debug!(
+                        "Found token {:#?} for property {:#?} of node {:#?}",
+                        possible_tokens[0], property, node
+                    );
+                    child_tokens.push(possible_tokens[0]);
+                    return;
                 }
+
+                if nearest_parent_location.is_none() {
+                    debug!("Found {:#?} for property {:#?} and no nearest_parent_location set", possible_tokens, property);
+                    has_too_many_tokens = true;
+                    return;
+                }
+
+                let token = possible_tokens
+                    .iter().map(|t| ((nearest_parent_location.unwrap() as i32 - t.start), t))
+                    .min_by_key(|(d, _)| d.to_owned())
+                    .map(|(_, t)| t);
+
+                debug!("Selected {:#?} as token closest from parent {:#?} as location {:#?}", token.unwrap(), node, nearest_parent_location);
+
+                child_tokens.push(token.unwrap());
             };
 
             match node {
                 #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
             };
 
-            ChildTokenRange {
-                child_token_indices: child_tokens.iter().map(|t| t.start).collect(),
-                range: if child_tokens.len() > 0 {
-                    Some(TextRange::new(
+
+            if has_too_many_tokens == true {
+                ChildTokenRangeResult::TooManyTokens
+            } else if child_tokens.len() == 0 {
+                ChildTokenRangeResult::NoTokens
+            } else {
+                ChildTokenRangeResult::ChildTokenRange {
+                    used_token_indices: child_tokens.iter().map(|t| t.start).collect(),
+                    range: TextRange::new(
                         TextSize::from(child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32),
                         TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
-                    ))
-                } else {
-                    None
+                    )
                 }
             }
         }
@@ -252,6 +274,13 @@ fn custom_handlers(node: &Node) -> TokenStream {
         "Integer" => quote! {
             get_token(TokenProperty::from(n));
         },
+        "WindowDef" => quote! {
+            if n.partition_clause.len() > 0 {
+                get_token(TokenProperty::from(Token::Window));
+            } else {
+                get_token(TokenProperty::from(Token::Over));
+            }
+        },
         "Boolean" => quote! {
             get_token(TokenProperty::from(n));
         },
@@ -263,6 +292,28 @@ fn custom_handlers(node: &Node) -> TokenStream {
                 get_token(TokenProperty::from(Token::Filter));
             }
         },
+        "SqlvalueFunction" => quote! {
+            match n.op {
+                // 1 SvfopCurrentDate
+                // 2 SvfopCurrentTime
+                // 3 SvfopCurrentTimeN
+                // 4 SvfopCurrentTimestamp
+                // 5 SvfopCurrentTimestampN
+                // 6 SvfopLocaltime
+                // 7 SvfopLocaltimeN
+                // 8 SvfopLocaltimestamp
+                // 9 SvfopLocaltimestampN
+                // 10 SvfopCurrentRole
+                10 => get_token(TokenProperty::from(Token::CurrentRole)),
+                // 11 SvfopCurrentUser
+                11 => get_token(TokenProperty::from(Token::CurrentUser)),
+                // 12 SvfopUser
+                // 13 SvfopSessionUser
+                // 14 SvfopCurrentCatalog
+                // 15 SvfopCurrentSchema
+                _ => panic!("Unknown SqlvalueFunction {:#?}", n.op),
+            }
+        },
         "SortBy" => quote! {
             get_token(TokenProperty::from(Token::Order));
             match n.sortby_dir {
@@ -271,9 +322,6 @@ fn custom_handlers(node: &Node) -> TokenStream {
                 _ => {}
             }
         },
-        "WindowDef" => quote! {
-            get_token(TokenProperty::from(Token::Partition));
-        },
         "AConst" => quote! {
             if n.isnull {
                 get_token(TokenProperty::from(Token::NullP));
diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index 166b2723..a55ffb65 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -1,11 +1,10 @@
-use std::cmp::{max, min};
+use std::cmp::max;
 
-use crate::get_child_token_range_codegen::get_child_token_range;
+use crate::get_child_token_range_codegen::{get_child_token_range, ChildTokenRangeResult};
 use crate::get_location_codegen::get_location;
 use crate::get_nodes_codegen::Node;
 use cstree::text::{TextRange, TextSize};
-use log::debug;
-use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};
+use pg_query::protobuf::ScanToken;
 
 #[derive(Debug, Clone)]
 pub struct RangedNode {
@@ -19,73 +18,110 @@ pub fn estimate_node_range(
     tokens: &Vec<ScanToken>,
     text: &str,
 ) -> Vec<RangedNode> {
-    let mut ranged_nodes: Vec<RangedNode> = Vec::new();
-
-    let mut used_tokens: Vec<i32> = Vec::new();
-
     // ensure that all children of any given node are already processed before processing the node itself
     nodes.sort_by(|a, b| b.path.cmp(&a.path));
 
-    // we get an estimated range by searching for tokens that match the node property values
-    // and, if available, the `location` of the node itself
-    nodes.iter().for_each(|n| {
-        // first, get the estimated boundaries of the node based on the `location` property of a node
-        let nearest_parent_location = get_nearest_parent_location(&n, nodes);
+    // first get ranges only from child tokens
+    let mut used_tokens: Vec<i32> = Vec::new();
+    let mut child_token_ranges: Vec<Option<TextRange>> = Vec::new();
+    let mut too_many_tokens_at: Vec<usize> = Vec::new();
 
-        let child_token_range = get_child_token_range(
+    nodes.iter().for_each(|n| {
+        match get_child_token_range(
             &n.node,
             tokens
                 .iter()
                 .filter(|t| !used_tokens.contains(&t.start))
                 .collect(),
             text,
-            nearest_parent_location,
-        );
+            None,
+        ) {
+            ChildTokenRangeResult::TooManyTokens => {
+                too_many_tokens_at.push(nodes.iter().position(|x| x.path == n.path).unwrap());
+                child_token_ranges.push(None);
+            }
+            ChildTokenRangeResult::ChildTokenRange {
+                used_token_indices,
+                range,
+            } => {
+                used_tokens.extend(used_token_indices);
+                child_token_ranges.push(Some(range));
+            }
+            ChildTokenRangeResult::NoTokens => {
+                child_token_ranges.push(None);
+            }
+        };
+    });
+
+    // second iteration using the nearest parent from the first
+    for idx in too_many_tokens_at {
+        // get the nearest parent location
+        let nearest_parent_start =
+            get_nearest_parent_start(&nodes[idx], &nodes, &child_token_ranges);
+        let nearest_parent_location = get_nearest_parent_location(&nodes[idx], &nodes);
+
+        match get_child_token_range(
+            &nodes[idx].node,
+            tokens
+                .iter()
+                .filter(|t| !used_tokens.contains(&t.start))
+                .collect(),
+            text,
+            Some(max(nearest_parent_start, nearest_parent_location)),
+        ) {
+            ChildTokenRangeResult::ChildTokenRange {
+                used_token_indices,
+                range,
+            } => {
+                used_tokens.extend(used_token_indices);
+                child_token_ranges[idx] = Some(range)
+            }
+            _ => {}
+        };
+    }
+
+    let mut ranged_nodes: Vec<RangedNode> = Vec::new();
+
+    // we get an estimated range by searching for tokens that match the node property values
+    // and, if available, the `location` of the node itself
+    nodes.iter().enumerate().for_each(|(idx, n)| {
+        let child_token_range = child_token_ranges[idx];
 
-        used_tokens.extend(child_token_range.child_token_indices);
+        println!("node: {:#?}, child_token_range: {:?}", n, child_token_range);
 
-        // For `from`, the location of the node itself is always correct.
-        // If not available, the closest estimation is the smaller value of the start of the first direct child token,
-        // and the start of all children ranges. If neither is available, let’s panic for now.
-        // The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself.
         let child_node_ranges = ranged_nodes
             .iter()
             .filter(|x| x.inner.path.starts_with(n.path.as_str()))
             .collect::<Vec<&RangedNode>>();
-        let location = get_location(&n.node);
-        let from = if location.is_some() {
-            Some(TextSize::from(location.unwrap()))
+
+        // get `from` location
+        let node_location = match get_location(&n.node) {
+            Some(l) => Some(TextSize::from(l)),
+            None => None,
+        };
+        let start_of_all_children_ranges = if child_node_ranges.len() > 0 {
+            Some(
+                child_node_ranges
+                    .iter()
+                    .min_by_key(|n| n.range.start())
+                    .unwrap()
+                    .range
+                    .start(),
+            )
         } else {
-            let start_of_all_children_ranges = if child_node_ranges.len() > 0 {
-                Some(
-                    child_node_ranges
-                        .iter()
-                        .min_by_key(|n| n.range.start())
-                        .unwrap()
-                        .range
-                        .start(),
-                )
-            } else {
-                None
-            };
-
-            if child_token_range.range.is_some() {
-                let start_of_first_child_token = child_token_range.range.unwrap().start();
-                if start_of_all_children_ranges.is_some() {
-                    Some(min(
-                        start_of_first_child_token,
-                        start_of_all_children_ranges.unwrap(),
-                    ))
-                } else {
-                    Some(start_of_first_child_token)
-                }
-            } else if start_of_all_children_ranges.is_some() {
-                Some(start_of_all_children_ranges.unwrap())
-            } else {
-                debug!("No location or child tokens found for node {:?}", n);
-                None
-            }
+            None
         };
+        let start_of_first_child_token = match child_token_range {
+            Some(r) => Some(r.start()),
+            None => None,
+        };
+
+        let from_locations: [Option<TextSize>; 3] = [
+            node_location,
+            start_of_all_children_ranges,
+            start_of_first_child_token,
+        ];
+        let from = from_locations.iter().filter(|v| v.is_some()).min();
 
         // For `to`, it’s the larger value of the end of the last direkt child token, and the end of all children ranges.
         let end_of_all_children_ranges = if child_node_ranges.len() > 0 {
@@ -100,27 +136,18 @@ pub fn estimate_node_range(
         } else {
             None
         };
-        let to = if child_token_range.range.is_some() {
-            let end_of_last_child_token = child_token_range.range.unwrap().end();
-            if end_of_all_children_ranges.is_some() {
-                Some(max(
-                    end_of_last_child_token,
-                    end_of_all_children_ranges.unwrap(),
-                ))
-            } else {
-                Some(end_of_last_child_token)
-            }
-        } else if end_of_all_children_ranges.is_some() {
-            Some(end_of_all_children_ranges.unwrap())
-        } else {
-            debug!("No child tokens or children ranges found for node {:?}", n);
-            None
+        let end_of_last_child_token = match child_token_range {
+            Some(r) => Some(r.end()),
+            None => None,
         };
+        let to_locations: [Option<TextSize>; 2] =
+            [end_of_all_children_ranges, end_of_last_child_token];
+        let to = to_locations.iter().filter(|v| v.is_some()).max();
 
         if from.is_some() && to.is_some() {
             ranged_nodes.push(RangedNode {
                 inner: n.to_owned(),
-                range: TextRange::new(from.unwrap(), to.unwrap()),
+                range: TextRange::new(from.unwrap().unwrap(), to.unwrap().unwrap()),
             });
         }
     });
@@ -131,6 +158,29 @@ pub fn estimate_node_range(
     ranged_nodes
 }
 
+fn get_nearest_parent_start(
+    node: &Node,
+    nodes: &Vec<Node>,
+    child_token_ranges: &Vec<Option<TextRange>>,
+) -> u32 {
+    let mut path_elements = node.path.split(".").collect::<Vec<&str>>();
+    path_elements.pop();
+    while path_elements.len() > 0 {
+        let parent_path = path_elements.join(".");
+        let parent_idx = nodes.iter().position(|c| c.path == parent_path);
+        if parent_idx.is_some() {
+            if child_token_ranges[parent_idx.unwrap()].is_some() {
+                return u32::from(child_token_ranges[parent_idx.unwrap()].unwrap().start());
+            }
+        }
+
+        path_elements.pop();
+    }
+
+    // fallback to 0
+    0
+}
+
 fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> u32 {
     // if location is set, return it
     let location = get_location(&n.node);
@@ -155,7 +205,7 @@ fn get_nearest_parent_location(n: &Node, children: &Vec<Node>) -> u32 {
     }
 
     // fallback to 0
-    return 0;
+    0
 }
 
 #[cfg(test)]
diff --git a/crates/parser/src/source_parser.rs b/crates/parser/src/source_parser.rs
index 98e4c91f..d59acc44 100644
--- a/crates/parser/src/source_parser.rs
+++ b/crates/parser/src/source_parser.rs
@@ -75,7 +75,7 @@ fn tokens(input: &str) -> Vec<Token> {
 }
 
 impl Parser {
-    fn parse_source_at(&mut self, text: &str, at_offset: Option<u32>) {
+    pub fn parse_source_at(&mut self, text: &str, at_offset: Option<u32>) {
         let offset = at_offset.unwrap_or(0);
 
         let tokens = tokens(&text);
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 89c3c99c..46e7126d 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -357,6 +357,48 @@ mod tests {
         parser.parse_statement_at(input, None);
         let parsed = parser.finish();
 
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_window_call() {
+        init();
+
+        let input =
+            "SELECT sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_access_priv() {
+        init();
+
+        let input = "GRANT SELECT (col1), UPDATE (col1) ON mytable TO miriam_rw;";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
+        dbg!(&parsed.cst);
+
+        assert_eq!(parsed.cst.text(), input);
+    }
+
+    #[test]
+    fn test_create_policy() {
+        init();
+
+        let input = "CREATE POLICY account_managers ON accounts TO managers USING (manager = current_user);";
+
+        let mut parser = Parser::new();
+        parser.parse_statement_at(input, None);
+        let parsed = parser.finish();
+
         dbg!(&parsed.cst);
 
         assert_eq!(parsed.cst.text(), input);
diff --git a/crates/parser/tests/snapshots/statements/valid/0012.snap b/crates/parser/tests/snapshots/statements/valid/0012.snap
index f4d77ae0..ad8b591b 100644
--- a/crates/parser/tests/snapshots/statements/valid/0012.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0012.snap
@@ -2,45 +2,54 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE measurement_y2008m01 PARTITION OF measurement\n    FOR VALUES FROM ('2008-01-01') TO ('2008-02-01')\n    WITH (parallel_workers = 4)\n    TABLESPACE fasttablespace;\n"
 ---
-CreateStmt@0..175
+CreateStmt@0..174
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..33 "measurement_y2008m01"
+  RangeVar@13..33
+    Ident@13..33 "measurement_y2008m01"
   Whitespace@33..34 " "
   Partition@34..43 "PARTITION"
   Whitespace@43..44 " "
   Of@44..46 "OF"
   Whitespace@46..47 " "
-  Ident@47..58 "measurement"
+  RangeVar@47..58
+    Ident@47..58 "measurement"
   Newline@58..59 "\n"
   Whitespace@59..63 "    "
   For@63..66 "FOR"
   Whitespace@66..67 " "
   Values@67..73 "VALUES"
   Whitespace@73..74 " "
-  From@74..78 "FROM"
-  Whitespace@78..79 " "
-  Ascii40@79..80 "("
-  Sconst@80..92 "'2008-01-01'"
-  Ascii41@92..93 ")"
-  Whitespace@93..94 " "
-  To@94..96 "TO"
-  Whitespace@96..97 " "
-  Ascii40@97..98 "("
-  Sconst@98..110 "'2008-02-01'"
-  Ascii41@110..111 ")"
+  PartitionBoundSpec@74..111
+    From@74..78 "FROM"
+    Whitespace@78..79 " "
+    Ascii40@79..80 "("
+    AConst@80..92
+      String@80..92
+        Sconst@80..92 "'2008-01-01'"
+    Ascii41@92..93 ")"
+    Whitespace@93..94 " "
+    To@94..96 "TO"
+    Whitespace@96..97 " "
+    Ascii40@97..98 "("
+    AConst@98..110
+      String@98..110
+        Sconst@98..110 "'2008-02-01'"
+    Ascii41@110..111 ")"
   Newline@111..112 "\n"
   Whitespace@112..116 "    "
   With@116..120 "WITH"
   Whitespace@120..121 " "
   Ascii40@121..122 "("
-  Ident@122..138 "parallel_workers"
-  Whitespace@138..139 " "
-  Ascii61@139..140 "="
-  Whitespace@140..141 " "
-  Iconst@141..142 "4"
+  DefElem@122..142
+    Ident@122..138 "parallel_workers"
+    Whitespace@138..139 " "
+    Ascii61@139..140 "="
+    Whitespace@140..141 " "
+    Integer@141..142
+      Iconst@141..142 "4"
   Ascii41@142..143 ")"
   Newline@143..144 "\n"
   Whitespace@144..148 "    "
@@ -48,5 +57,4 @@ CreateStmt@0..175
   Whitespace@158..159 " "
   Ident@159..173 "fasttablespace"
   Ascii59@173..174 ";"
-  Newline@174..175 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0012.snap.new b/crates/parser/tests/snapshots/statements/valid/0012.snap.new
deleted file mode 100644
index f956dd31..00000000
--- a/crates/parser/tests/snapshots/statements/valid/0012.snap.new
+++ /dev/null
@@ -1,61 +0,0 @@
----
-source: crates/parser/tests/statement_parser_test.rs
-assertion_line: 39
-description: "CREATE TABLE measurement_y2008m01 PARTITION OF measurement\n    FOR VALUES FROM ('2008-01-01') TO ('2008-02-01')\n    WITH (parallel_workers = 4)\n    TABLESPACE fasttablespace;\n"
----
-CreateStmt@0..174
-  Create@0..6 "CREATE"
-  Whitespace@6..7 " "
-  Table@7..12 "TABLE"
-  Whitespace@12..13 " "
-  RangeVar@13..33
-    Ident@13..33 "measurement_y2008m01"
-  Whitespace@33..34 " "
-  Partition@34..43 "PARTITION"
-  Whitespace@43..44 " "
-  Of@44..46 "OF"
-  Whitespace@46..47 " "
-  RangeVar@47..58
-    Ident@47..58 "measurement"
-  Newline@58..59 "\n"
-  Whitespace@59..63 "    "
-  For@63..66 "FOR"
-  Whitespace@66..67 " "
-  Values@67..73 "VALUES"
-  Whitespace@73..74 " "
-  PartitionBoundSpec@74..111
-    From@74..78 "FROM"
-    Whitespace@78..79 " "
-    Ascii40@79..80 "("
-    AConst@80..92
-      String@80..92
-        Sconst@80..92 "'2008-01-01'"
-    Ascii41@92..93 ")"
-    Whitespace@93..94 " "
-    To@94..96 "TO"
-    Whitespace@96..97 " "
-    Ascii40@97..98 "("
-    AConst@98..110
-      String@98..110
-        Sconst@98..110 "'2008-02-01'"
-    Ascii41@110..111 ")"
-  Newline@111..112 "\n"
-  Whitespace@112..116 "    "
-  With@116..120 "WITH"
-  Whitespace@120..121 " "
-  Ascii40@121..122 "("
-  DefElem@122..142
-    Ident@122..138 "parallel_workers"
-    Whitespace@138..139 " "
-    Ascii61@139..140 "="
-    Whitespace@140..141 " "
-    Integer@141..142
-      Iconst@141..142 "4"
-  Ascii41@142..143 ")"
-  Newline@143..144 "\n"
-  Whitespace@144..148 "    "
-  Tablespace@148..158 "TABLESPACE"
-  Whitespace@158..159 " "
-  Ident@159..173 "fasttablespace"
-  Ascii59@173..174 ";"
-
diff --git a/crates/parser/tests/snapshots/statements/valid/0013.snap b/crates/parser/tests/snapshots/statements/valid/0013.snap
index 643f1e95..031b67df 100644
--- a/crates/parser/tests/snapshots/statements/valid/0013.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0013.snap
@@ -2,43 +2,63 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "UPDATE weather\n    SET temp_hi = temp_hi - 2,  temp_lo = temp_lo - 2\n    WHERE date > '1994-11-28';\n"
 ---
-UpdateStmt@0..100
+UpdateStmt@0..99
   Update@0..6 "UPDATE"
   Whitespace@6..7 " "
-  Ident@7..14 "weather"
+  RangeVar@7..14
+    Ident@7..14 "weather"
   Newline@14..15 "\n"
   Whitespace@15..19 "    "
   Set@19..22 "SET"
   Whitespace@22..23 " "
-  Ident@23..30 "temp_hi"
-  Whitespace@30..31 " "
-  Ascii61@31..32 "="
-  Whitespace@32..33 " "
-  Ident@33..40 "temp_hi"
-  Whitespace@40..41 " "
-  Ascii45@41..42 "-"
-  Whitespace@42..43 " "
-  Iconst@43..44 "2"
+  ResTarget@23..44
+    Ident@23..30 "temp_hi"
+    Whitespace@30..31 " "
+    Ascii61@31..32 "="
+    Whitespace@32..33 " "
+    AExpr@33..44
+      ColumnRef@33..40
+        String@33..40
+          Ident@33..40 "temp_hi"
+      Whitespace@40..41 " "
+      String@41..42
+        Ascii45@41..42 "-"
+      Whitespace@42..43 " "
+      AConst@43..44
+        Integer@43..44
+          Iconst@43..44 "2"
   Ascii44@44..45 ","
   Whitespace@45..47 "  "
-  Ident@47..54 "temp_lo"
-  Whitespace@54..55 " "
-  Ascii61@55..56 "="
-  Whitespace@56..57 " "
-  Ident@57..64 "temp_lo"
-  Whitespace@64..65 " "
-  Ascii45@65..66 "-"
-  Whitespace@66..67 " "
-  Iconst@67..68 "2"
+  ResTarget@47..68
+    Ident@47..54 "temp_lo"
+    Whitespace@54..55 " "
+    Ascii61@55..56 "="
+    Whitespace@56..57 " "
+    AExpr@57..68
+      ColumnRef@57..64
+        String@57..64
+          Ident@57..64 "temp_lo"
+      Whitespace@64..65 " "
+      String@65..66
+        Ascii45@65..66 "-"
+      Whitespace@66..67 " "
+      AConst@67..68
+        Integer@67..68
+          Iconst@67..68 "2"
   Newline@68..69 "\n"
   Whitespace@69..73 "    "
   Where@73..78 "WHERE"
   Whitespace@78..79 " "
-  Ident@79..83 "date"
-  Whitespace@83..84 " "
-  Ascii62@84..85 ">"
-  Whitespace@85..86 " "
-  Sconst@86..98 "'1994-11-28'"
+  AExpr@79..98
+    ColumnRef@79..83
+      String@79..83
+        Ident@79..83 "date"
+    Whitespace@83..84 " "
+    String@84..85
+      Ascii62@84..85 ">"
+    Whitespace@85..86 " "
+    AConst@86..98
+      String@86..98
+        Sconst@86..98 "'1994-11-28'"
   Ascii59@98..99 ";"
-  Newline@99..100 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0013.snap.new b/crates/parser/tests/snapshots/statements/valid/0013.snap.new
deleted file mode 100644
index 2d1f7551..00000000
--- a/crates/parser/tests/snapshots/statements/valid/0013.snap.new
+++ /dev/null
@@ -1,65 +0,0 @@
----
-source: crates/parser/tests/statement_parser_test.rs
-assertion_line: 39
-description: "UPDATE weather\n    SET temp_hi = temp_hi - 2,  temp_lo = temp_lo - 2\n    WHERE date > '1994-11-28';\n"
----
-UpdateStmt@0..99
-  Update@0..6 "UPDATE"
-  Whitespace@6..7 " "
-  RangeVar@7..14
-    Ident@7..14 "weather"
-  Newline@14..15 "\n"
-  Whitespace@15..19 "    "
-  Set@19..22 "SET"
-  Whitespace@22..23 " "
-  ResTarget@23..44
-    Ident@23..30 "temp_hi"
-    Whitespace@30..31 " "
-    Ascii61@31..32 "="
-    Whitespace@32..33 " "
-    AExpr@33..44
-      ColumnRef@33..40
-        String@33..40
-          Ident@33..40 "temp_hi"
-      Whitespace@40..41 " "
-      String@41..42
-        Ascii45@41..42 "-"
-      Whitespace@42..43 " "
-      AConst@43..44
-        Integer@43..44
-          Iconst@43..44 "2"
-  Ascii44@44..45 ","
-  Whitespace@45..47 "  "
-  ResTarget@47..68
-    Ident@47..54 "temp_lo"
-    Whitespace@54..55 " "
-    Ascii61@55..56 "="
-    Whitespace@56..57 " "
-    AExpr@57..68
-      ColumnRef@57..64
-        String@57..64
-          Ident@57..64 "temp_lo"
-      Whitespace@64..65 " "
-      String@65..66
-        Ascii45@65..66 "-"
-      Whitespace@66..67 " "
-      AConst@67..68
-        Integer@67..68
-          Iconst@67..68 "2"
-  Newline@68..69 "\n"
-  Whitespace@69..73 "    "
-  Where@73..78 "WHERE"
-  Whitespace@78..79 " "
-  AExpr@79..98
-    ColumnRef@79..83
-      String@79..83
-        Ident@79..83 "date"
-    Whitespace@83..84 " "
-    String@84..85
-      Ascii62@84..85 ">"
-    Whitespace@85..86 " "
-    AConst@86..98
-      String@86..98
-        Sconst@86..98 "'1994-11-28'"
-  Ascii59@98..99 ";"
-
diff --git a/crates/parser/tests/snapshots/statements/valid/0014.snap b/crates/parser/tests/snapshots/statements/valid/0014.snap
index 490b0142..8d760544 100644
--- a/crates/parser/tests/snapshots/statements/valid/0014.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0014.snap
@@ -5,8 +5,8 @@ description: "SELECT sum(salary) OVER w, avg(salary) OVER w\n  FROM empsalary\n
 SelectStmt@0..121
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  ResTarget@7..120
-    FuncCall@7..120
+  ResTarget@7..25
+    FuncCall@7..25
       String@7..10
         Ident@7..10 "sum"
       Ascii40@10..11 "("
@@ -15,60 +15,60 @@ SelectStmt@0..121
           Ident@11..17 "salary"
       Ascii41@17..18 ")"
       Whitespace@18..19 " "
-      Over@19..23 "OVER"
-      Whitespace@23..24 " "
-      WindowDef@24..120
+      WindowDef@19..25
+        Over@19..23 "OVER"
+        Whitespace@23..24 " "
         Ident@24..25 "w"
-        Ascii44@25..26 ","
-        Whitespace@26..27 " "
-        ResTarget@27..120
-          FuncCall@27..120
-            String@27..30
-              Ident@27..30 "avg"
-            Ascii40@30..31 "("
-            ColumnRef@31..37
-              String@31..37
-                Ident@31..37 "salary"
-            Ascii41@37..38 ")"
-            Whitespace@38..39 " "
-            Over@39..43 "OVER"
-            Whitespace@43..44 " "
-            WindowDef@44..120
-              Ident@44..45 "w"
-              Newline@45..46 "\n"
-              Whitespace@46..48 "  "
-              From@48..52 "FROM"
-              Whitespace@52..53 " "
-              RangeVar@53..62
-                Ident@53..62 "empsalary"
-              Newline@62..63 "\n"
-              Whitespace@63..65 "  "
-              Window@65..71 "WINDOW"
-              Whitespace@71..72 " "
-              Ident@72..73 "w"
-              Whitespace@73..74 " "
-              As@74..76 "AS"
-              Whitespace@76..77 " "
-              WindowDef@77..120
-                Ascii40@77..78 "("
-                Partition@78..87 "PARTITION"
-                Whitespace@87..88 " "
-                By@88..90 "BY"
-                Whitespace@90..91 " "
-                ColumnRef@91..98
-                  String@91..98
-                    Ident@91..98 "depname"
-                Whitespace@98..99 " "
-                SortBy@99..119
-                  Order@99..104 "ORDER"
-                  Whitespace@104..105 " "
-                  By@105..107 "BY"
-                  Whitespace@107..108 " "
-                  ColumnRef@108..114
-                    String@108..114
-                      Ident@108..114 "salary"
-                  Whitespace@114..115 " "
-                  Desc@115..119 "DESC"
-                Ascii41@119..120 ")"
+  Ascii44@25..26 ","
+  Whitespace@26..27 " "
+  ResTarget@27..45
+    FuncCall@27..45
+      String@27..30
+        Ident@27..30 "avg"
+      Ascii40@30..31 "("
+      ColumnRef@31..37
+        String@31..37
+          Ident@31..37 "salary"
+      Ascii41@37..38 ")"
+      Whitespace@38..39 " "
+      WindowDef@39..45
+        Over@39..43 "OVER"
+        Whitespace@43..44 " "
+        Ident@44..45 "w"
+  Newline@45..46 "\n"
+  Whitespace@46..48 "  "
+  From@48..52 "FROM"
+  Whitespace@52..53 " "
+  RangeVar@53..62
+    Ident@53..62 "empsalary"
+  Newline@62..63 "\n"
+  Whitespace@63..65 "  "
+  WindowDef@65..120
+    Window@65..71 "WINDOW"
+    Whitespace@71..72 " "
+    Ident@72..73 "w"
+    Whitespace@73..74 " "
+    As@74..76 "AS"
+    Whitespace@76..77 " "
+    Ascii40@77..78 "("
+    Partition@78..87 "PARTITION"
+    Whitespace@87..88 " "
+    By@88..90 "BY"
+    Whitespace@90..91 " "
+    ColumnRef@91..98
+      String@91..98
+        Ident@91..98 "depname"
+    Whitespace@98..99 " "
+    SortBy@99..119
+      Order@99..104 "ORDER"
+      Whitespace@104..105 " "
+      By@105..107 "BY"
+      Whitespace@107..108 " "
+      ColumnRef@108..114
+        String@108..114
+          Ident@108..114 "salary"
+      Whitespace@114..115 " "
+      Desc@115..119 "DESC"
+    Ascii41@119..120 ")"
   Ascii59@120..121 ";"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0015.snap b/crates/parser/tests/snapshots/statements/valid/0015.snap
index 0d9223c5..4f6bf7df 100644
--- a/crates/parser/tests/snapshots/statements/valid/0015.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0015.snap
@@ -2,57 +2,78 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT\n    count(*) AS unfiltered,\n    count(*) FILTER (WHERE i < 5) AS filtered\nFROM generate_series(1,10) AS s(i);\n"
 ---
-SelectStmt@0..117
+SelectStmt@0..116
   Select@0..6 "SELECT"
   Newline@6..7 "\n"
   Whitespace@7..11 "    "
-  Ident@11..16 "count"
-  Ascii40@16..17 "("
-  Ascii42@17..18 "*"
-  Ascii41@18..19 ")"
-  Whitespace@19..20 " "
-  As@20..22 "AS"
-  Whitespace@22..23 " "
-  Ident@23..33 "unfiltered"
+  ResTarget@11..33
+    FuncCall@11..16
+      String@11..16
+        Ident@11..16 "count"
+    Ascii40@16..17 "("
+    Ascii42@17..18 "*"
+    Ascii41@18..19 ")"
+    Whitespace@19..20 " "
+    As@20..22 "AS"
+    Whitespace@22..23 " "
+    Ident@23..33 "unfiltered"
   Ascii44@33..34 ","
   Newline@34..35 "\n"
   Whitespace@35..39 "    "
-  Ident@39..44 "count"
-  Ascii40@44..45 "("
-  Ascii42@45..46 "*"
-  Ascii41@46..47 ")"
-  Whitespace@47..48 " "
-  Filter@48..54 "FILTER"
-  Whitespace@54..55 " "
-  Ascii40@55..56 "("
-  Where@56..61 "WHERE"
-  Whitespace@61..62 " "
-  Ident@62..63 "i"
-  Whitespace@63..64 " "
-  Ascii60@64..65 "<"
-  Whitespace@65..66 " "
-  Iconst@66..67 "5"
-  Ascii41@67..68 ")"
-  Whitespace@68..69 " "
-  As@69..71 "AS"
-  Whitespace@71..72 " "
-  Ident@72..80 "filtered"
+  ResTarget@39..80
+    FuncCall@39..68
+      String@39..44
+        Ident@39..44 "count"
+      Ascii40@44..45 "("
+      Ascii42@45..46 "*"
+      Ascii41@46..47 ")"
+      Whitespace@47..48 " "
+      Filter@48..54 "FILTER"
+      Whitespace@54..55 " "
+      Ascii40@55..56 "("
+      Where@56..61 "WHERE"
+      Whitespace@61..62 " "
+      AExpr@62..67
+        ColumnRef@62..63
+          String@62..63
+            Ident@62..63 "i"
+        Whitespace@63..64 " "
+        String@64..65
+          Ascii60@64..65 "<"
+        Whitespace@65..66 " "
+        AConst@66..67
+          Integer@66..67
+            Iconst@66..67 "5"
+      Ascii41@67..68 ")"
+    Whitespace@68..69 " "
+    As@69..71 "AS"
+    Whitespace@71..72 " "
+    Ident@72..80 "filtered"
   Newline@80..81 "\n"
   From@81..85 "FROM"
   Whitespace@85..86 " "
-  Ident@86..101 "generate_series"
-  Ascii40@101..102 "("
-  Iconst@102..103 "1"
-  Ascii44@103..104 ","
-  Iconst@104..106 "10"
-  Ascii41@106..107 ")"
-  Whitespace@107..108 " "
-  As@108..110 "AS"
-  Whitespace@110..111 " "
-  Ident@111..112 "s"
-  Ascii40@112..113 "("
-  Ident@113..114 "i"
-  Ascii41@114..115 ")"
+  RangeFunction@86..115
+    List@86..107
+      FuncCall@86..107
+        String@86..101
+          Ident@86..101 "generate_series"
+        Ascii40@101..102 "("
+        AConst@102..103
+          Integer@102..103
+            Iconst@102..103 "1"
+        Ascii44@103..104 ","
+        AConst@104..106
+          Integer@104..106
+            Iconst@104..106 "10"
+        Ascii41@106..107 ")"
+    Whitespace@107..108 " "
+    As@108..110 "AS"
+    Whitespace@110..111 " "
+    Alias@111..115
+      Ident@111..112 "s"
+      Ascii40@112..113 "("
+      String@113..114
+        Ident@113..114 "i"
+      Ascii41@114..115 ")"
   Ascii59@115..116 ";"
-  Newline@116..117 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0015.snap.new b/crates/parser/tests/snapshots/statements/valid/0015.snap.new
deleted file mode 100644
index 89d0aa08..00000000
--- a/crates/parser/tests/snapshots/statements/valid/0015.snap.new
+++ /dev/null
@@ -1,80 +0,0 @@
----
-source: crates/parser/tests/statement_parser_test.rs
-assertion_line: 39
-description: "SELECT\n    count(*) AS unfiltered,\n    count(*) FILTER (WHERE i < 5) AS filtered\nFROM generate_series(1,10) AS s(i);\n"
----
-SelectStmt@0..116
-  Select@0..6 "SELECT"
-  Newline@6..7 "\n"
-  Whitespace@7..11 "    "
-  ResTarget@11..33
-    FuncCall@11..16
-      String@11..16
-        Ident@11..16 "count"
-    Ascii40@16..17 "("
-    Ascii42@17..18 "*"
-    Ascii41@18..19 ")"
-    Whitespace@19..20 " "
-    As@20..22 "AS"
-    Whitespace@22..23 " "
-    Ident@23..33 "unfiltered"
-  Ascii44@33..34 ","
-  Newline@34..35 "\n"
-  Whitespace@35..39 "    "
-  ResTarget@39..112
-    FuncCall@39..112
-      String@39..44
-        Ident@39..44 "count"
-      Ascii40@44..45 "("
-      Ascii42@45..46 "*"
-      Ascii41@46..47 ")"
-      Whitespace@47..48 " "
-      Filter@48..54 "FILTER"
-      Whitespace@54..55 " "
-      Ascii40@55..56 "("
-      Where@56..61 "WHERE"
-      Whitespace@61..62 " "
-      RangeFunction@62..112
-        Alias@62..112
-          String@62..67
-            AExpr@62..67
-              ColumnRef@62..63
-                String@62..63
-                  Ident@62..63 "i"
-              Whitespace@63..64 " "
-              String@64..65
-                Ascii60@64..65 "<"
-              Whitespace@65..66 " "
-              AConst@66..67
-                Integer@66..67
-                  Iconst@66..67 "5"
-          Ascii41@67..68 ")"
-          Whitespace@68..69 " "
-          As@69..71 "AS"
-          Whitespace@71..72 " "
-          Ident@72..80 "filtered"
-          Newline@80..81 "\n"
-          From@81..85 "FROM"
-          Whitespace@85..86 " "
-          List@86..107
-            FuncCall@86..107
-              String@86..101
-                Ident@86..101 "generate_series"
-              Ascii40@101..102 "("
-              AConst@102..103
-                Integer@102..103
-                  Iconst@102..103 "1"
-              Ascii44@103..104 ","
-              AConst@104..106
-                Integer@104..106
-                  Iconst@104..106 "10"
-              Ascii41@106..107 ")"
-          Whitespace@107..108 " "
-          As@108..110 "AS"
-          Whitespace@110..111 " "
-          Ident@111..112 "s"
-  Ascii40@112..113 "("
-  Ident@113..114 "i"
-  Ascii41@114..115 ")"
-  Ascii59@115..116 ";"
-
diff --git a/crates/parser/tests/snapshots/statements/valid/0016.snap b/crates/parser/tests/snapshots/statements/valid/0016.snap
index 6306aa10..a93b47ec 100644
--- a/crates/parser/tests/snapshots/statements/valid/0016.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0016.snap
@@ -2,26 +2,36 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT * FROM tbl WHERE a COLLATE \"C\" > 'foo';\n"
 ---
-SelectStmt@0..47
+SelectStmt@0..46
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Ascii42@7..8 "*"
+  ResTarget@7..8
+    ColumnRef@7..8
+      AStar@7..8
+        Ascii42@7..8 "*"
   Whitespace@8..9 " "
   From@9..13 "FROM"
   Whitespace@13..14 " "
-  Ident@14..17 "tbl"
+  RangeVar@14..17
+    Ident@14..17 "tbl"
   Whitespace@17..18 " "
   Where@18..23 "WHERE"
   Whitespace@23..24 " "
-  Ident@24..25 "a"
-  Whitespace@25..26 " "
-  Collate@26..33 "COLLATE"
-  Whitespace@33..34 " "
-  Ident@34..37 "\"C\""
-  Whitespace@37..38 " "
-  Ascii62@38..39 ">"
-  Whitespace@39..40 " "
-  Sconst@40..45 "'foo'"
+  AExpr@24..45
+    CollateClause@24..25
+      ColumnRef@24..25
+        String@24..25
+          Ident@24..25 "a"
+    Whitespace@25..26 " "
+    Collate@26..33 "COLLATE"
+    Whitespace@33..34 " "
+    Ident@34..37 "\"C\""
+    Whitespace@37..38 " "
+    String@38..39
+      Ascii62@38..39 ">"
+    Whitespace@39..40 " "
+    AConst@40..45
+      String@40..45
+        Sconst@40..45 "'foo'"
   Ascii59@45..46 ";"
-  Newline@46..47 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0017.snap b/crates/parser/tests/snapshots/statements/valid/0017.snap
index 6c3f9d52..21dc6c7d 100644
--- a/crates/parser/tests/snapshots/statements/valid/0017.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0017.snap
@@ -2,41 +2,61 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT name, (SELECT max(pop) FROM cities WHERE cities.state = states.name)\n    FROM states;\n"
 ---
-SelectStmt@0..93
+SelectStmt@0..92
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  NameP@7..11 "name"
+  ResTarget@7..11
+    ColumnRef@7..11
+      String@7..11
+        NameP@7..11 "name"
   Ascii44@11..12 ","
   Whitespace@12..13 " "
-  Ascii40@13..14 "("
-  Select@14..20 "SELECT"
-  Whitespace@20..21 " "
-  Ident@21..24 "max"
-  Ascii40@24..25 "("
-  Ident@25..28 "pop"
-  Ascii41@28..29 ")"
-  Whitespace@29..30 " "
-  From@30..34 "FROM"
-  Whitespace@34..35 " "
-  Ident@35..41 "cities"
-  Whitespace@41..42 " "
-  Where@42..47 "WHERE"
-  Whitespace@47..48 " "
-  Ident@48..54 "cities"
-  Ascii46@54..55 "."
-  Ident@55..60 "state"
-  Whitespace@60..61 " "
-  Ascii61@61..62 "="
-  Whitespace@62..63 " "
-  Ident@63..69 "states"
-  Ascii46@69..70 "."
-  NameP@70..74 "name"
-  Ascii41@74..75 ")"
+  ResTarget@13..75
+    SubLink@13..75
+      Ascii40@13..14 "("
+      SelectStmt@14..74
+        Select@14..20 "SELECT"
+        Whitespace@20..21 " "
+        ResTarget@21..29
+          FuncCall@21..29
+            String@21..24
+              Ident@21..24 "max"
+            Ascii40@24..25 "("
+            ColumnRef@25..28
+              String@25..28
+                Ident@25..28 "pop"
+            Ascii41@28..29 ")"
+        Whitespace@29..30 " "
+        From@30..34 "FROM"
+        Whitespace@34..35 " "
+        RangeVar@35..41
+          Ident@35..41 "cities"
+        Whitespace@41..42 " "
+        Where@42..47 "WHERE"
+        Whitespace@47..48 " "
+        AExpr@48..74
+          ColumnRef@48..60
+            String@48..54
+              Ident@48..54 "cities"
+            Ascii46@54..55 "."
+            String@55..60
+              Ident@55..60 "state"
+          Whitespace@60..61 " "
+          String@61..62
+            Ascii61@61..62 "="
+          Whitespace@62..63 " "
+          ColumnRef@63..74
+            String@63..69
+              Ident@63..69 "states"
+            Ascii46@69..70 "."
+            String@70..74
+              NameP@70..74 "name"
+      Ascii41@74..75 ")"
   Newline@75..76 "\n"
   Whitespace@76..80 "    "
   From@80..84 "FROM"
   Whitespace@84..85 " "
-  Ident@85..91 "states"
+  RangeVar@85..91
+    Ident@85..91 "states"
   Ascii59@91..92 ";"
-  Newline@92..93 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0018.snap b/crates/parser/tests/snapshots/statements/valid/0018.snap
index a93ef99c..d8152852 100644
--- a/crates/parser/tests/snapshots/statements/valid/0018.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0018.snap
@@ -2,21 +2,31 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT ARRAY[1,2,22.7]::integer[];\n"
 ---
-SelectStmt@0..35
+SelectStmt@0..34
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Array@7..12 "ARRAY"
-  Ascii91@12..13 "["
-  Iconst@13..14 "1"
-  Ascii44@14..15 ","
-  Iconst@15..16 "2"
-  Ascii44@16..17 ","
-  Fconst@17..21 "22.7"
-  Ascii93@21..22 "]"
-  Typecast@22..24 "::"
-  Integer@24..31 "integer"
+  ResTarget@7..31
+    TypeCast@7..31
+      AArrayExpr@7..21
+        Array@7..12 "ARRAY"
+        Ascii91@12..13 "["
+        AConst@13..14
+          Integer@13..14
+            Iconst@13..14 "1"
+        Ascii44@14..15 ","
+        AConst@15..16
+          Integer@15..16
+            Iconst@15..16 "2"
+        Ascii44@16..17 ","
+        AConst@17..21
+          Float@17..21
+            Fconst@17..21 "22.7"
+      Ascii93@21..22 "]"
+      Typecast@22..24 "::"
+      TypeName@24..31
+        String@24..31
+          Integer@24..31 "integer"
   Ascii91@31..32 "["
   Ascii93@32..33 "]"
   Ascii59@33..34 ";"
-  Newline@34..35 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0019.snap b/crates/parser/tests/snapshots/statements/valid/0019.snap
index 97f59c8f..ac87dd53 100644
--- a/crates/parser/tests/snapshots/statements/valid/0019.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0019.snap
@@ -2,33 +2,52 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT CASE WHEN min(employees) > 0\n            THEN avg(expenses / employees)\n       END\n    FROM departments;\n"
 ---
-SelectStmt@0..112
+SelectStmt@0..111
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Case@7..11 "CASE"
-  Whitespace@11..12 " "
-  When@12..16 "WHEN"
-  Whitespace@16..17 " "
-  Ident@17..20 "min"
-  Ascii40@20..21 "("
-  Ident@21..30 "employees"
-  Ascii41@30..31 ")"
-  Whitespace@31..32 " "
-  Ascii62@32..33 ">"
-  Whitespace@33..34 " "
-  Iconst@34..35 "0"
-  Newline@35..36 "\n"
-  Whitespace@36..48 "            "
-  Then@48..52 "THEN"
-  Whitespace@52..53 " "
-  Ident@53..56 "avg"
-  Ascii40@56..57 "("
-  Ident@57..65 "expenses"
-  Whitespace@65..66 " "
-  Ascii47@66..67 "/"
-  Whitespace@67..68 " "
-  Ident@68..77 "employees"
-  Ascii41@77..78 ")"
+  ResTarget@7..78
+    CaseExpr@7..78
+      Case@7..11 "CASE"
+      Whitespace@11..12 " "
+      CaseWhen@12..78
+        When@12..16 "WHEN"
+        Whitespace@16..17 " "
+        AExpr@17..35
+          FuncCall@17..31
+            String@17..20
+              Ident@17..20 "min"
+            Ascii40@20..21 "("
+            ColumnRef@21..30
+              String@21..30
+                Ident@21..30 "employees"
+            Ascii41@30..31 ")"
+          Whitespace@31..32 " "
+          String@32..33
+            Ascii62@32..33 ">"
+          Whitespace@33..34 " "
+          AConst@34..35
+            Integer@34..35
+              Iconst@34..35 "0"
+        Newline@35..36 "\n"
+        Whitespace@36..48 "            "
+        Then@48..52 "THEN"
+        Whitespace@52..53 " "
+        FuncCall@53..78
+          String@53..56
+            Ident@53..56 "avg"
+          Ascii40@56..57 "("
+          AExpr@57..77
+            ColumnRef@57..65
+              String@57..65
+                Ident@57..65 "expenses"
+            Whitespace@65..66 " "
+            String@66..67
+              Ascii47@66..67 "/"
+            Whitespace@67..68 " "
+            ColumnRef@68..77
+              String@68..77
+                Ident@68..77 "employees"
+          Ascii41@77..78 ")"
   Newline@78..79 "\n"
   Whitespace@79..86 "       "
   EndP@86..89 "END"
@@ -36,7 +55,7 @@ SelectStmt@0..112
   Whitespace@90..94 "    "
   From@94..98 "FROM"
   Whitespace@98..99 " "
-  Ident@99..110 "departments"
+  RangeVar@99..110
+    Ident@99..110 "departments"
   Ascii59@110..111 ";"
-  Newline@111..112 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0020.snap b/crates/parser/tests/snapshots/statements/valid/0020.snap
index f84f0ed9..6a404472 100644
--- a/crates/parser/tests/snapshots/statements/valid/0020.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0020.snap
@@ -2,47 +2,66 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE FUNCTION concat_lower_or_upper(a text, b text, uppercase boolean DEFAULT false)\nRETURNS text\nAS\n$$\n SELECT CASE\n        WHEN $3 THEN UPPER($1 || ' ' || $2)\n        ELSE LOWER($1 || ' ' || $2)\n        END;\n$$\nLANGUAGE SQL IMMUTABLE STRICT;\n"
 ---
-CreateFunctionStmt@0..246
+CreateFunctionStmt@0..245
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Function@7..15 "FUNCTION"
   Whitespace@15..16 " "
-  Ident@16..37 "concat_lower_or_upper"
+  String@16..37
+    Ident@16..37 "concat_lower_or_upper"
   Ascii40@37..38 "("
-  Ident@38..39 "a"
-  Whitespace@39..40 " "
-  TextP@40..44 "text"
+  FunctionParameter@38..44
+    Ident@38..39 "a"
+    Whitespace@39..40 " "
+    TypeName@40..44
+      String@40..44
+        TextP@40..44 "text"
   Ascii44@44..45 ","
   Whitespace@45..46 " "
-  Ident@46..47 "b"
-  Whitespace@47..48 " "
-  TextP@48..52 "text"
+  FunctionParameter@46..52
+    Ident@46..47 "b"
+    Whitespace@47..48 " "
+    TypeName@48..52
+      String@48..52
+        TextP@48..52 "text"
   Ascii44@52..53 ","
   Whitespace@53..54 " "
-  Ident@54..63 "uppercase"
-  Whitespace@63..64 " "
-  BooleanP@64..71 "boolean"
-  Whitespace@71..72 " "
-  Default@72..79 "DEFAULT"
-  Whitespace@79..80 " "
-  FalseP@80..85 "false"
+  FunctionParameter@54..85
+    Ident@54..63 "uppercase"
+    Whitespace@63..64 " "
+    BooleanP@64..71 "boolean"
+    Whitespace@71..72 " "
+    Default@72..79 "DEFAULT"
+    Whitespace@79..80 " "
+    AConst@80..85
+      Boolean@80..85
+        FalseP@80..85 "false"
   Ascii41@85..86 ")"
   Newline@86..87 "\n"
   Returns@87..94 "RETURNS"
   Whitespace@94..95 " "
-  TextP@95..99 "text"
+  TypeName@95..99
+    String@95..99
+      TextP@95..99 "text"
   Newline@99..100 "\n"
-  As@100..102 "AS"
-  Newline@102..103 "\n"
-  Sconst@103..214 "$$\n SELECT CASE\n      ..."
+  DefElem@100..214
+    As@100..102 "AS"
+    Newline@102..103 "\n"
+    List@103..214
+      String@103..214
+        Sconst@103..214 "$$\n SELECT CASE\n      ..."
   Newline@214..215 "\n"
-  Language@215..223 "LANGUAGE"
-  Whitespace@223..224 " "
-  SqlP@224..227 "SQL"
+  DefElem@215..227
+    Language@215..223 "LANGUAGE"
+    Whitespace@223..224 " "
+    String@224..227
+      SqlP@224..227 "SQL"
   Whitespace@227..228 " "
-  Immutable@228..237 "IMMUTABLE"
+  DefElem@228..237
+    String@228..237
+      Immutable@228..237 "IMMUTABLE"
   Whitespace@237..238 " "
-  StrictP@238..244 "STRICT"
+  DefElem@238..244
+    StrictP@238..244 "STRICT"
   Ascii59@244..245 ";"
-  Newline@245..246 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0021.snap b/crates/parser/tests/snapshots/statements/valid/0021.snap
index ec8ea421..805cf921 100644
--- a/crates/parser/tests/snapshots/statements/valid/0021.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0021.snap
@@ -2,31 +2,42 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SELECT concat_lower_or_upper(a => 'Hello', b => 'World', uppercase => true);\n"
 ---
-SelectStmt@0..77
+SelectStmt@0..76
   Select@0..6 "SELECT"
   Whitespace@6..7 " "
-  Ident@7..28 "concat_lower_or_upper"
-  Ascii40@28..29 "("
-  Ident@29..30 "a"
-  Whitespace@30..31 " "
-  EqualsGreater@31..33 "=>"
-  Whitespace@33..34 " "
-  Sconst@34..41 "'Hello'"
-  Ascii44@41..42 ","
-  Whitespace@42..43 " "
-  Ident@43..44 "b"
-  Whitespace@44..45 " "
-  EqualsGreater@45..47 "=>"
-  Whitespace@47..48 " "
-  Sconst@48..55 "'World'"
-  Ascii44@55..56 ","
-  Whitespace@56..57 " "
-  Ident@57..66 "uppercase"
-  Whitespace@66..67 " "
-  EqualsGreater@67..69 "=>"
-  Whitespace@69..70 " "
-  TrueP@70..74 "true"
-  Ascii41@74..75 ")"
+  ResTarget@7..75
+    FuncCall@7..75
+      String@7..28
+        Ident@7..28 "concat_lower_or_upper"
+      Ascii40@28..29 "("
+      NamedArgExpr@29..41
+        Ident@29..30 "a"
+        Whitespace@30..31 " "
+        EqualsGreater@31..33 "=>"
+        Whitespace@33..34 " "
+        AConst@34..41
+          String@34..41
+            Sconst@34..41 "'Hello'"
+      Ascii44@41..42 ","
+      Whitespace@42..43 " "
+      NamedArgExpr@43..55
+        Ident@43..44 "b"
+        Whitespace@44..45 " "
+        EqualsGreater@45..47 "=>"
+        Whitespace@47..48 " "
+        AConst@48..55
+          String@48..55
+            Sconst@48..55 "'World'"
+      Ascii44@55..56 ","
+      Whitespace@56..57 " "
+      NamedArgExpr@57..74
+        Ident@57..66 "uppercase"
+        Whitespace@66..67 " "
+        EqualsGreater@67..69 "=>"
+        Whitespace@69..70 " "
+        AConst@70..74
+          Boolean@70..74
+            TrueP@70..74 "true"
+      Ascii41@74..75 ")"
   Ascii59@75..76 ";"
-  Newline@76..77 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0022.snap b/crates/parser/tests/snapshots/statements/valid/0022.snap
index b80610e9..78671f80 100644
--- a/crates/parser/tests/snapshots/statements/valid/0022.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0022.snap
@@ -2,37 +2,49 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE products (\n    product_no integer,\n    name text,\n    price numeric DEFAULT 9.99\n);\n"
 ---
-CreateStmt@0..97
+CreateStmt@0..96
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..21 "products"
+  RangeVar@13..21
+    Ident@13..21 "products"
   Whitespace@21..22 " "
   Ascii40@22..23 "("
   Newline@23..24 "\n"
   Whitespace@24..28 "    "
-  Ident@28..38 "product_no"
-  Whitespace@38..39 " "
-  Integer@39..46 "integer"
+  ColumnDef@28..46
+    Ident@28..38 "product_no"
+    Whitespace@38..39 " "
+    TypeName@39..46
+      String@39..46
+        Integer@39..46 "integer"
   Ascii44@46..47 ","
   Newline@47..48 "\n"
   Whitespace@48..52 "    "
-  NameP@52..56 "name"
-  Whitespace@56..57 " "
-  TextP@57..61 "text"
+  ColumnDef@52..61
+    NameP@52..56 "name"
+    Whitespace@56..57 " "
+    TypeName@57..61
+      String@57..61
+        TextP@57..61 "text"
   Ascii44@61..62 ","
   Newline@62..63 "\n"
   Whitespace@63..67 "    "
-  Ident@67..72 "price"
-  Whitespace@72..73 " "
-  Numeric@73..80 "numeric"
-  Whitespace@80..81 " "
-  Default@81..88 "DEFAULT"
-  Whitespace@88..89 " "
-  Fconst@89..93 "9.99"
+  ColumnDef@67..93
+    Ident@67..72 "price"
+    Whitespace@72..73 " "
+    TypeName@73..80
+      String@73..80
+        Numeric@73..80 "numeric"
+    Whitespace@80..81 " "
+    Constraint@81..93
+      Default@81..88 "DEFAULT"
+      Whitespace@88..89 " "
+      AConst@89..93
+        Float@89..93
+          Fconst@89..93 "9.99"
   Newline@93..94 "\n"
   Ascii41@94..95 ")"
   Ascii59@95..96 ";"
-  Newline@96..97 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0023.snap b/crates/parser/tests/snapshots/statements/valid/0023.snap
index 2a8ccc2b..b9af2250 100644
--- a/crates/parser/tests/snapshots/statements/valid/0023.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0023.snap
@@ -2,71 +2,104 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE products (\n    product_no integer,\n    name text,\n    price numeric CHECK (price > 0),\n    discounted_price numeric CHECK (discounted_price > 0),\n    CHECK (price > discounted_price)\n);\n"
 ---
-CreateStmt@0..199
+CreateStmt@0..198
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..21 "products"
+  RangeVar@13..21
+    Ident@13..21 "products"
   Whitespace@21..22 " "
   Ascii40@22..23 "("
   Newline@23..24 "\n"
   Whitespace@24..28 "    "
-  Ident@28..38 "product_no"
-  Whitespace@38..39 " "
-  Integer@39..46 "integer"
+  ColumnDef@28..46
+    Ident@28..38 "product_no"
+    Whitespace@38..39 " "
+    TypeName@39..46
+      String@39..46
+        Integer@39..46 "integer"
   Ascii44@46..47 ","
   Newline@47..48 "\n"
   Whitespace@48..52 "    "
-  NameP@52..56 "name"
-  Whitespace@56..57 " "
-  TextP@57..61 "text"
+  ColumnDef@52..61
+    NameP@52..56 "name"
+    Whitespace@56..57 " "
+    TypeName@57..61
+      String@57..61
+        TextP@57..61 "text"
   Ascii44@61..62 ","
   Newline@62..63 "\n"
   Whitespace@63..67 "    "
-  Ident@67..72 "price"
-  Whitespace@72..73 " "
-  Numeric@73..80 "numeric"
-  Whitespace@80..81 " "
-  Check@81..86 "CHECK"
-  Whitespace@86..87 " "
-  Ascii40@87..88 "("
-  Ident@88..93 "price"
-  Whitespace@93..94 " "
-  Ascii62@94..95 ">"
-  Whitespace@95..96 " "
-  Iconst@96..97 "0"
-  Ascii41@97..98 ")"
+  ColumnDef@67..98
+    Ident@67..72 "price"
+    Whitespace@72..73 " "
+    TypeName@73..80
+      String@73..80
+        Numeric@73..80 "numeric"
+    Whitespace@80..81 " "
+    Constraint@81..98
+      Check@81..86 "CHECK"
+      Whitespace@86..87 " "
+      Ascii40@87..88 "("
+      AExpr@88..97
+        ColumnRef@88..93
+          String@88..93
+            Ident@88..93 "price"
+        Whitespace@93..94 " "
+        String@94..95
+          Ascii62@94..95 ">"
+        Whitespace@95..96 " "
+        AConst@96..97
+          Integer@96..97
+            Iconst@96..97 "0"
+      Ascii41@97..98 ")"
   Ascii44@98..99 ","
   Newline@99..100 "\n"
   Whitespace@100..104 "    "
-  Ident@104..120 "discounted_price"
-  Whitespace@120..121 " "
-  Numeric@121..128 "numeric"
-  Whitespace@128..129 " "
-  Check@129..134 "CHECK"
-  Whitespace@134..135 " "
-  Ascii40@135..136 "("
-  Ident@136..152 "discounted_price"
-  Whitespace@152..153 " "
-  Ascii62@153..154 ">"
-  Whitespace@154..155 " "
-  Iconst@155..156 "0"
-  Ascii41@156..157 ")"
+  ColumnDef@104..157
+    Ident@104..120 "discounted_price"
+    Whitespace@120..121 " "
+    TypeName@121..128
+      String@121..128
+        Numeric@121..128 "numeric"
+    Whitespace@128..129 " "
+    Constraint@129..157
+      Check@129..134 "CHECK"
+      Whitespace@134..135 " "
+      Ascii40@135..136 "("
+      AExpr@136..156
+        ColumnRef@136..152
+          String@136..152
+            Ident@136..152 "discounted_price"
+        Whitespace@152..153 " "
+        String@153..154
+          Ascii62@153..154 ">"
+        Whitespace@154..155 " "
+        AConst@155..156
+          Integer@155..156
+            Iconst@155..156 "0"
+      Ascii41@156..157 ")"
   Ascii44@157..158 ","
   Newline@158..159 "\n"
   Whitespace@159..163 "    "
-  Check@163..168 "CHECK"
-  Whitespace@168..169 " "
-  Ascii40@169..170 "("
-  Ident@170..175 "price"
-  Whitespace@175..176 " "
-  Ascii62@176..177 ">"
-  Whitespace@177..178 " "
-  Ident@178..194 "discounted_price"
-  Ascii41@194..195 ")"
+  Constraint@163..195
+    Check@163..168 "CHECK"
+    Whitespace@168..169 " "
+    Ascii40@169..170 "("
+    AExpr@170..194
+      ColumnRef@170..175
+        String@170..175
+          Ident@170..175 "price"
+      Whitespace@175..176 " "
+      String@176..177
+        Ascii62@176..177 ">"
+      Whitespace@177..178 " "
+      ColumnRef@178..194
+        String@178..194
+          Ident@178..194 "discounted_price"
+    Ascii41@194..195 ")"
   Newline@195..196 "\n"
   Ascii41@196..197 ")"
   Ascii59@197..198 ";"
-  Newline@198..199 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0024.snap b/crates/parser/tests/snapshots/statements/valid/0024.snap
index e4c63589..7e844839 100644
--- a/crates/parser/tests/snapshots/statements/valid/0024.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0024.snap
@@ -2,54 +2,70 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE order_items (\n    product_no integer REFERENCES products,\n    order_id integer REFERENCES orders,\n    quantity integer,\n    PRIMARY KEY (product_no, order_id)\n);\n"
 ---
-CreateStmt@0..175
+CreateStmt@0..174
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..24 "order_items"
+  RangeVar@13..24
+    Ident@13..24 "order_items"
   Whitespace@24..25 " "
   Ascii40@25..26 "("
   Newline@26..27 "\n"
   Whitespace@27..31 "    "
-  Ident@31..41 "product_no"
-  Whitespace@41..42 " "
-  Integer@42..49 "integer"
-  Whitespace@49..50 " "
-  References@50..60 "REFERENCES"
-  Whitespace@60..61 " "
-  Ident@61..69 "products"
+  ColumnDef@31..69
+    Ident@31..41 "product_no"
+    Whitespace@41..42 " "
+    TypeName@42..49
+      String@42..49
+        Integer@42..49 "integer"
+    Whitespace@49..50 " "
+    Constraint@50..69
+      References@50..60 "REFERENCES"
+      Whitespace@60..61 " "
+      RangeVar@61..69
+        Ident@61..69 "products"
   Ascii44@69..70 ","
   Newline@70..71 "\n"
   Whitespace@71..75 "    "
-  Ident@75..83 "order_id"
-  Whitespace@83..84 " "
-  Integer@84..91 "integer"
-  Whitespace@91..92 " "
-  References@92..102 "REFERENCES"
-  Whitespace@102..103 " "
-  Ident@103..109 "orders"
+  ColumnDef@75..109
+    Ident@75..83 "order_id"
+    Whitespace@83..84 " "
+    TypeName@84..91
+      String@84..91
+        Integer@84..91 "integer"
+    Whitespace@91..92 " "
+    Constraint@92..109
+      References@92..102 "REFERENCES"
+      Whitespace@102..103 " "
+      RangeVar@103..109
+        Ident@103..109 "orders"
   Ascii44@109..110 ","
   Newline@110..111 "\n"
   Whitespace@111..115 "    "
-  Ident@115..123 "quantity"
-  Whitespace@123..124 " "
-  Integer@124..131 "integer"
+  ColumnDef@115..131
+    Ident@115..123 "quantity"
+    Whitespace@123..124 " "
+    TypeName@124..131
+      String@124..131
+        Integer@124..131 "integer"
   Ascii44@131..132 ","
   Newline@132..133 "\n"
   Whitespace@133..137 "    "
-  Primary@137..144 "PRIMARY"
-  Whitespace@144..145 " "
-  Key@145..148 "KEY"
-  Whitespace@148..149 " "
-  Ascii40@149..150 "("
-  Ident@150..160 "product_no"
-  Ascii44@160..161 ","
-  Whitespace@161..162 " "
-  Ident@162..170 "order_id"
-  Ascii41@170..171 ")"
+  Constraint@137..171
+    Primary@137..144 "PRIMARY"
+    Whitespace@144..145 " "
+    Key@145..148 "KEY"
+    Whitespace@148..149 " "
+    Ascii40@149..150 "("
+    String@150..160
+      Ident@150..160 "product_no"
+    Ascii44@160..161 ","
+    Whitespace@161..162 " "
+    String@162..170
+      Ident@162..170 "order_id"
+    Ascii41@170..171 ")"
   Newline@171..172 "\n"
   Ascii41@172..173 ")"
   Ascii59@173..174 ";"
-  Newline@174..175 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0025.snap b/crates/parser/tests/snapshots/statements/valid/0025.snap
index cc279acf..0b8b6f2a 100644
--- a/crates/parser/tests/snapshots/statements/valid/0025.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0025.snap
@@ -2,24 +2,30 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "ALTER TABLE products ADD CHECK (name <> '');\n"
 ---
-AlterTableStmt@0..45
+AlterTableStmt@0..44
   Alter@0..5 "ALTER"
   Whitespace@5..6 " "
   Table@6..11 "TABLE"
   Whitespace@11..12 " "
-  Ident@12..20 "products"
+  RangeVar@12..20
+    Ident@12..20 "products"
   Whitespace@20..21 " "
   AddP@21..24 "ADD"
   Whitespace@24..25 " "
-  Check@25..30 "CHECK"
-  Whitespace@30..31 " "
-  Ascii40@31..32 "("
-  NameP@32..36 "name"
-  Whitespace@36..37 " "
-  NotEquals@37..39 "<>"
+  AlterTableCmd@25..39
+    Constraint@25..39
+      Check@25..30 "CHECK"
+      Whitespace@30..31 " "
+      Ascii40@31..32 "("
+      AExpr@32..39
+        ColumnRef@32..36
+          String@32..36
+            NameP@32..36 "name"
+        Whitespace@36..37 " "
+        String@37..39
+          NotEquals@37..39 "<>"
   Whitespace@39..40 " "
   Sconst@40..42 "''"
   Ascii41@42..43 ")"
   Ascii59@43..44 ";"
-  Newline@44..45 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0026.snap b/crates/parser/tests/snapshots/statements/valid/0026.snap
index 730b2138..a575f9dc 100644
--- a/crates/parser/tests/snapshots/statements/valid/0026.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0026.snap
@@ -2,27 +2,35 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "ALTER TABLE products ALTER COLUMN price TYPE numeric(10,2);\n"
 ---
-AlterTableStmt@0..60
+AlterTableStmt@0..59
   Alter@0..5 "ALTER"
   Whitespace@5..6 " "
   Table@6..11 "TABLE"
   Whitespace@11..12 " "
-  Ident@12..20 "products"
+  RangeVar@12..20
+    Ident@12..20 "products"
   Whitespace@20..21 " "
   Alter@21..26 "ALTER"
   Whitespace@26..27 " "
   Column@27..33 "COLUMN"
   Whitespace@33..34 " "
-  Ident@34..39 "price"
-  Whitespace@39..40 " "
-  TypeP@40..44 "TYPE"
-  Whitespace@44..45 " "
-  Numeric@45..52 "numeric"
-  Ascii40@52..53 "("
-  Iconst@53..55 "10"
-  Ascii44@55..56 ","
-  Iconst@56..57 "2"
-  Ascii41@57..58 ")"
+  AlterTableCmd@34..58
+    ColumnDef@34..58
+      Ident@34..39 "price"
+      Whitespace@39..40 " "
+      TypeP@40..44 "TYPE"
+      Whitespace@44..45 " "
+      TypeName@45..58
+        String@45..52
+          Numeric@45..52 "numeric"
+        Ascii40@52..53 "("
+        AConst@53..55
+          Integer@53..55
+            Iconst@53..55 "10"
+        Ascii44@55..56 ","
+        AConst@56..57
+          Integer@56..57
+            Iconst@56..57 "2"
+        Ascii41@57..58 ")"
   Ascii59@58..59 ";"
-  Newline@59..60 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0027.snap b/crates/parser/tests/snapshots/statements/valid/0027.snap
index 2b88e432..d4354e7d 100644
--- a/crates/parser/tests/snapshots/statements/valid/0027.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0027.snap
@@ -2,18 +2,20 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "GRANT UPDATE ON accounts TO joe;\n"
 ---
-GrantStmt@0..33
+GrantStmt@0..32
   Grant@0..5 "GRANT"
   Whitespace@5..6 " "
-  Update@6..12 "UPDATE"
+  AccessPriv@6..12
+    Update@6..12 "UPDATE"
   Whitespace@12..13 " "
   On@13..15 "ON"
   Whitespace@15..16 " "
-  Ident@16..24 "accounts"
+  RangeVar@16..24
+    Ident@16..24 "accounts"
   Whitespace@24..25 " "
   To@25..27 "TO"
   Whitespace@27..28 " "
-  Ident@28..31 "joe"
+  RoleSpec@28..31
+    Ident@28..31 "joe"
   Ascii59@31..32 ";"
-  Newline@32..33 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0028.snap b/crates/parser/tests/snapshots/statements/valid/0028.snap
index c9ca0372..05a72aba 100644
--- a/crates/parser/tests/snapshots/statements/valid/0028.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0028.snap
@@ -2,18 +2,18 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "REVOKE ALL ON accounts FROM PUBLIC;\n"
 ---
-GrantStmt@0..36
+GrantStmt@0..35
   Revoke@0..6 "REVOKE"
   Whitespace@6..7 " "
   All@7..10 "ALL"
   Whitespace@10..11 " "
   On@11..13 "ON"
   Whitespace@13..14 " "
-  Ident@14..22 "accounts"
+  RangeVar@14..22
+    Ident@14..22 "accounts"
   Whitespace@22..23 " "
   From@23..27 "FROM"
   Whitespace@27..28 " "
   Ident@28..34 "PUBLIC"
   Ascii59@34..35 ";"
-  Newline@35..36 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0029.snap b/crates/parser/tests/snapshots/statements/valid/0029.snap
index ae280f52..4c2bcacc 100644
--- a/crates/parser/tests/snapshots/statements/valid/0029.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0029.snap
@@ -2,29 +2,34 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "GRANT SELECT (col1), UPDATE (col1) ON mytable TO miriam_rw;\n"
 ---
-GrantStmt@0..60
+GrantStmt@0..59
   Grant@0..5 "GRANT"
   Whitespace@5..6 " "
-  Select@6..12 "SELECT"
-  Whitespace@12..13 " "
-  Ascii40@13..14 "("
-  Ident@14..18 "col1"
-  Ascii41@18..19 ")"
+  AccessPriv@6..19
+    Select@6..12 "SELECT"
+    Whitespace@12..13 " "
+    Ascii40@13..14 "("
+    String@14..18
+      Ident@14..18 "col1"
+    Ascii41@18..19 ")"
   Ascii44@19..20 ","
   Whitespace@20..21 " "
-  Update@21..27 "UPDATE"
-  Whitespace@27..28 " "
-  Ascii40@28..29 "("
-  Ident@29..33 "col1"
-  Ascii41@33..34 ")"
+  AccessPriv@21..34
+    Update@21..27 "UPDATE"
+    Whitespace@27..28 " "
+    Ascii40@28..29 "("
+    String@29..33
+      Ident@29..33 "col1"
+    Ascii41@33..34 ")"
   Whitespace@34..35 " "
   On@35..37 "ON"
   Whitespace@37..38 " "
-  Ident@38..45 "mytable"
+  RangeVar@38..45
+    Ident@38..45 "mytable"
   Whitespace@45..46 " "
   To@46..48 "TO"
   Whitespace@48..49 " "
-  Ident@49..58 "miriam_rw"
+  RoleSpec@49..58
+    Ident@49..58 "miriam_rw"
   Ascii59@58..59 ";"
-  Newline@59..60 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0030.snap b/crates/parser/tests/snapshots/statements/valid/0030.snap
index 68c40f34..e3f2bb1e 100644
--- a/crates/parser/tests/snapshots/statements/valid/0030.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0030.snap
@@ -2,7 +2,7 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE POLICY account_managers ON accounts TO managers\n    USING (manager = current_user);\n"
 ---
-CreatePolicyStmt@0..91
+CreatePolicyStmt@0..90
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Policy@7..13 "POLICY"
@@ -11,22 +11,28 @@ CreatePolicyStmt@0..91
   Whitespace@30..31 " "
   On@31..33 "ON"
   Whitespace@33..34 " "
-  Ident@34..42 "accounts"
+  RangeVar@34..42
+    Ident@34..42 "accounts"
   Whitespace@42..43 " "
   To@43..45 "TO"
   Whitespace@45..46 " "
-  Ident@46..54 "managers"
+  RoleSpec@46..54
+    Ident@46..54 "managers"
   Newline@54..55 "\n"
   Whitespace@55..59 "    "
   Using@59..64 "USING"
   Whitespace@64..65 " "
   Ascii40@65..66 "("
-  Ident@66..73 "manager"
-  Whitespace@73..74 " "
-  Ascii61@74..75 "="
-  Whitespace@75..76 " "
-  CurrentUser@76..88 "current_user"
+  AExpr@66..88
+    ColumnRef@66..73
+      String@66..73
+        Ident@66..73 "manager"
+    Whitespace@73..74 " "
+    String@74..75
+      Ascii61@74..75 "="
+    Whitespace@75..76 " "
+    SqlvalueFunction@76..88
+      CurrentUser@76..88 "current_user"
   Ascii41@88..89 ")"
   Ascii59@89..90 ";"
-  Newline@90..91 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0031.snap b/crates/parser/tests/snapshots/statements/valid/0031.snap
index f3456e11..42afd22e 100644
--- a/crates/parser/tests/snapshots/statements/valid/0031.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0031.snap
@@ -2,7 +2,7 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE POLICY user_mod ON passwd FOR UPDATE\n  USING (current_user = user_name)\n  WITH CHECK (\n    current_user = user_name AND\n    shell IN ('/bin/bash','/bin/sh','/bin/dash','/bin/zsh','/bin/tcsh')\n  );\n"
 ---
-CreatePolicyStmt@0..204
+CreatePolicyStmt@0..203
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Policy@7..13 "POLICY"
@@ -11,7 +11,8 @@ CreatePolicyStmt@0..204
   Whitespace@22..23 " "
   On@23..25 "ON"
   Whitespace@25..26 " "
-  Ident@26..32 "passwd"
+  RangeVar@26..32
+    Ident@26..32 "passwd"
   Whitespace@32..33 " "
   For@33..36 "FOR"
   Whitespace@36..37 " "
@@ -21,48 +22,72 @@ CreatePolicyStmt@0..204
   Using@46..51 "USING"
   Whitespace@51..52 " "
   Ascii40@52..53 "("
-  CurrentUser@53..65 "current_user"
-  Whitespace@65..66 " "
-  Ascii61@66..67 "="
-  Whitespace@67..68 " "
-  Ident@68..77 "user_name"
-  Ascii41@77..78 ")"
-  Newline@78..79 "\n"
-  Whitespace@79..81 "  "
-  With@81..85 "WITH"
-  Whitespace@85..86 " "
-  Check@86..91 "CHECK"
-  Whitespace@91..92 " "
-  Ascii40@92..93 "("
-  Newline@93..94 "\n"
-  Whitespace@94..98 "    "
-  CurrentUser@98..110 "current_user"
-  Whitespace@110..111 " "
-  Ascii61@111..112 "="
-  Whitespace@112..113 " "
-  Ident@113..122 "user_name"
-  Whitespace@122..123 " "
-  And@123..126 "AND"
-  Newline@126..127 "\n"
-  Whitespace@127..131 "    "
-  Ident@131..136 "shell"
-  Whitespace@136..137 " "
-  InP@137..139 "IN"
-  Whitespace@139..140 " "
-  Ascii40@140..141 "("
-  Sconst@141..152 "'/bin/bash'"
-  Ascii44@152..153 ","
-  Sconst@153..162 "'/bin/sh'"
-  Ascii44@162..163 ","
-  Sconst@163..174 "'/bin/dash'"
-  Ascii44@174..175 ","
-  Sconst@175..185 "'/bin/zsh'"
-  Ascii44@185..186 ","
-  Sconst@186..197 "'/bin/tcsh'"
-  Ascii41@197..198 ")"
-  Newline@198..199 "\n"
-  Whitespace@199..201 "  "
-  Ascii41@201..202 ")"
+  AExpr@53..202
+    SqlvalueFunction@53..65
+      CurrentUser@53..65 "current_user"
+    Whitespace@65..66 " "
+    BoolExpr@66..202
+      AExpr@66..202
+        String@66..67
+          Ascii61@66..67 "="
+        Whitespace@67..68 " "
+        ColumnRef@68..77
+          String@68..77
+            Ident@68..77 "user_name"
+        Ascii41@77..78 ")"
+        Newline@78..79 "\n"
+        Whitespace@79..81 "  "
+        With@81..85 "WITH"
+        Whitespace@85..86 " "
+        Check@86..91 "CHECK"
+        Whitespace@91..92 " "
+        Ascii40@92..93 "("
+        Newline@93..94 "\n"
+        Whitespace@94..98 "    "
+        SqlvalueFunction@98..110
+          CurrentUser@98..110 "current_user"
+        Whitespace@110..111 " "
+        AExpr@111..202
+          String@111..112
+            Ascii61@111..112 "="
+          Whitespace@112..113 " "
+          ColumnRef@113..122
+            String@113..122
+              Ident@113..122 "user_name"
+          Whitespace@122..123 " "
+          And@123..126 "AND"
+          Newline@126..127 "\n"
+          Whitespace@127..131 "    "
+          ColumnRef@131..136
+            String@131..136
+              Ident@131..136 "shell"
+          Whitespace@136..137 " "
+          InP@137..139 "IN"
+          Whitespace@139..140 " "
+          Ascii40@140..141 "("
+          List@141..197
+            AConst@141..152
+              String@141..152
+                Sconst@141..152 "'/bin/bash'"
+            Ascii44@152..153 ","
+            AConst@153..162
+              String@153..162
+                Sconst@153..162 "'/bin/sh'"
+            Ascii44@162..163 ","
+            AConst@163..174
+              String@163..174
+                Sconst@163..174 "'/bin/dash'"
+            Ascii44@174..175 ","
+            AConst@175..185
+              String@175..185
+                Sconst@175..185 "'/bin/zsh'"
+            Ascii44@185..186 ","
+            AConst@186..197
+              String@186..197
+                Sconst@186..197 "'/bin/tcsh'"
+          Ascii41@197..198 ")"
+          Newline@198..199 "\n"
+          Whitespace@199..201 "  "
+          Ascii41@201..202 ")"
   Ascii59@202..203 ";"
-  Newline@203..204 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0032.snap b/crates/parser/tests/snapshots/statements/valid/0032.snap
index d82132b6..772fbfbc 100644
--- a/crates/parser/tests/snapshots/statements/valid/0032.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0032.snap
@@ -2,16 +2,19 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "SET search_path TO myschema,public;\n"
 ---
-VariableSetStmt@0..36
+VariableSetStmt@0..35
   Set@0..3 "SET"
   Whitespace@3..4 " "
   Ident@4..15 "search_path"
   Whitespace@15..16 " "
   To@16..18 "TO"
   Whitespace@18..19 " "
-  Ident@19..27 "myschema"
+  AConst@19..27
+    String@19..27
+      Ident@19..27 "myschema"
   Ascii44@27..28 ","
-  Ident@28..34 "public"
+  AConst@28..34
+    String@28..34
+      Ident@28..34 "public"
   Ascii59@34..35 ";"
-  Newline@35..36 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0033.snap b/crates/parser/tests/snapshots/statements/valid/0033.snap
index dbef84a2..f871cd80 100644
--- a/crates/parser/tests/snapshots/statements/valid/0033.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0033.snap
@@ -2,19 +2,23 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE TABLE measurement (\n    city_id         int not null,\n    logdate         date not null,\n    peaktemp        int,\n    unitsales       int\n) PARTITION BY RANGE (logdate);\n"
 ---
-CreateStmt@0..177
+CreateStmt@0..176
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Table@7..12 "TABLE"
   Whitespace@12..13 " "
-  Ident@13..24 "measurement"
+  RangeVar@13..24
+    Ident@13..24 "measurement"
   Whitespace@24..25 " "
   Ascii40@25..26 "("
   Newline@26..27 "\n"
   Whitespace@27..31 "    "
-  Ident@31..38 "city_id"
-  Whitespace@38..47 "         "
-  IntP@47..50 "int"
+  ColumnDef@31..50
+    Ident@31..38 "city_id"
+    Whitespace@38..47 "         "
+    TypeName@47..50
+      String@47..50
+        IntP@47..50 "int"
   Whitespace@50..51 " "
   Not@51..54 "not"
   Whitespace@54..55 " "
@@ -22,9 +26,12 @@ CreateStmt@0..177
   Ascii44@59..60 ","
   Newline@60..61 "\n"
   Whitespace@61..65 "    "
-  Ident@65..72 "logdate"
-  Whitespace@72..81 "         "
-  Ident@81..85 "date"
+  ColumnDef@65..85
+    Ident@65..72 "logdate"
+    Whitespace@72..81 "         "
+    TypeName@81..85
+      String@81..85
+        Ident@81..85 "date"
   Whitespace@85..86 " "
   Not@86..89 "not"
   Whitespace@89..90 " "
@@ -32,27 +39,34 @@ CreateStmt@0..177
   Ascii44@94..95 ","
   Newline@95..96 "\n"
   Whitespace@96..100 "    "
-  Ident@100..108 "peaktemp"
-  Whitespace@108..116 "        "
-  IntP@116..119 "int"
+  ColumnDef@100..119
+    Ident@100..108 "peaktemp"
+    Whitespace@108..116 "        "
+    TypeName@116..119
+      String@116..119
+        IntP@116..119 "int"
   Ascii44@119..120 ","
   Newline@120..121 "\n"
   Whitespace@121..125 "    "
-  Ident@125..134 "unitsales"
-  Whitespace@134..141 "       "
-  IntP@141..144 "int"
+  ColumnDef@125..144
+    Ident@125..134 "unitsales"
+    Whitespace@134..141 "       "
+    TypeName@141..144
+      String@141..144
+        IntP@141..144 "int"
   Newline@144..145 "\n"
   Ascii41@145..146 ")"
   Whitespace@146..147 " "
-  Partition@147..156 "PARTITION"
-  Whitespace@156..157 " "
-  By@157..159 "BY"
-  Whitespace@159..160 " "
-  Range@160..165 "RANGE"
-  Whitespace@165..166 " "
-  Ascii40@166..167 "("
-  Ident@167..174 "logdate"
-  Ascii41@174..175 ")"
+  PartitionSpec@147..175
+    Partition@147..156 "PARTITION"
+    Whitespace@156..157 " "
+    By@157..159 "BY"
+    Whitespace@159..160 " "
+    Range@160..165 "RANGE"
+    Whitespace@165..166 " "
+    Ascii40@166..167 "("
+    PartitionElem@167..174
+      Ident@167..174 "logdate"
+    Ascii41@174..175 ")"
   Ascii59@175..176 ";"
-  Newline@176..177 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0034.snap b/crates/parser/tests/snapshots/statements/valid/0034.snap
index efede445..356bacab 100644
--- a/crates/parser/tests/snapshots/statements/valid/0034.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0034.snap
@@ -2,24 +2,36 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "select *,some_col from contact where id = '123 4 5';\n"
 ---
-SelectStmt@0..53
+SelectStmt@0..52
   Select@0..6 "select"
   Whitespace@6..7 " "
-  Ascii42@7..8 "*"
+  ResTarget@7..8
+    ColumnRef@7..8
+      AStar@7..8
+        Ascii42@7..8 "*"
   Ascii44@8..9 ","
-  Ident@9..17 "some_col"
+  ResTarget@9..17
+    ColumnRef@9..17
+      String@9..17
+        Ident@9..17 "some_col"
   Whitespace@17..18 " "
   From@18..22 "from"
   Whitespace@22..23 " "
-  Ident@23..30 "contact"
+  RangeVar@23..30
+    Ident@23..30 "contact"
   Whitespace@30..31 " "
   Where@31..36 "where"
   Whitespace@36..37 " "
-  Ident@37..39 "id"
-  Whitespace@39..40 " "
-  Ascii61@40..41 "="
-  Whitespace@41..42 " "
-  Sconst@42..51 "'123 4 5'"
+  AExpr@37..51
+    ColumnRef@37..39
+      String@37..39
+        Ident@37..39 "id"
+    Whitespace@39..40 " "
+    String@40..41
+      Ascii61@40..41 "="
+    Whitespace@41..42 " "
+    AConst@42..51
+      String@42..51
+        Sconst@42..51 "'123 4 5'"
   Ascii59@51..52 ";"
-  Newline@52..53 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0035.snap b/crates/parser/tests/snapshots/statements/valid/0035.snap
index efede445..356bacab 100644
--- a/crates/parser/tests/snapshots/statements/valid/0035.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0035.snap
@@ -2,24 +2,36 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "select *,some_col from contact where id = '123 4 5';\n"
 ---
-SelectStmt@0..53
+SelectStmt@0..52
   Select@0..6 "select"
   Whitespace@6..7 " "
-  Ascii42@7..8 "*"
+  ResTarget@7..8
+    ColumnRef@7..8
+      AStar@7..8
+        Ascii42@7..8 "*"
   Ascii44@8..9 ","
-  Ident@9..17 "some_col"
+  ResTarget@9..17
+    ColumnRef@9..17
+      String@9..17
+        Ident@9..17 "some_col"
   Whitespace@17..18 " "
   From@18..22 "from"
   Whitespace@22..23 " "
-  Ident@23..30 "contact"
+  RangeVar@23..30
+    Ident@23..30 "contact"
   Whitespace@30..31 " "
   Where@31..36 "where"
   Whitespace@36..37 " "
-  Ident@37..39 "id"
-  Whitespace@39..40 " "
-  Ascii61@40..41 "="
-  Whitespace@41..42 " "
-  Sconst@42..51 "'123 4 5'"
+  AExpr@37..51
+    ColumnRef@37..39
+      String@37..39
+        Ident@37..39 "id"
+    Whitespace@39..40 " "
+    String@40..41
+      Ascii61@40..41 "="
+    Whitespace@41..42 " "
+    AConst@42..51
+      String@42..51
+        Sconst@42..51 "'123 4 5'"
   Ascii59@51..52 ";"
-  Newline@52..53 "\n"
 
diff --git a/crates/parser/tests/snapshots/statements/valid/0036.snap b/crates/parser/tests/snapshots/statements/valid/0036.snap
index 56b13946..2f52a5d4 100644
--- a/crates/parser/tests/snapshots/statements/valid/0036.snap
+++ b/crates/parser/tests/snapshots/statements/valid/0036.snap
@@ -2,41 +2,55 @@
 source: crates/parser/tests/statement_parser_test.rs
 description: "CREATE FUNCTION dup(in int, out f1 int, out f2 text)\n    AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$\n    LANGUAGE SQL;\n"
 ---
-CreateFunctionStmt@0..126
+CreateFunctionStmt@0..125
   Create@0..6 "CREATE"
   Whitespace@6..7 " "
   Function@7..15 "FUNCTION"
   Whitespace@15..16 " "
-  Ident@16..19 "dup"
+  String@16..19
+    Ident@16..19 "dup"
   Ascii40@19..20 "("
   InP@20..22 "in"
   Whitespace@22..23 " "
-  IntP@23..26 "int"
+  FunctionParameter@23..26
+    TypeName@23..26
+      String@23..26
+        IntP@23..26 "int"
   Ascii44@26..27 ","
   Whitespace@27..28 " "
   OutP@28..31 "out"
   Whitespace@31..32 " "
-  Ident@32..34 "f1"
-  Whitespace@34..35 " "
-  IntP@35..38 "int"
+  FunctionParameter@32..38
+    Ident@32..34 "f1"
+    Whitespace@34..35 " "
+    TypeName@35..38
+      String@35..38
+        IntP@35..38 "int"
   Ascii44@38..39 ","
   Whitespace@39..40 " "
   OutP@40..43 "out"
   Whitespace@43..44 " "
-  Ident@44..46 "f2"
-  Whitespace@46..47 " "
-  TextP@47..51 "text"
+  FunctionParameter@44..51
+    Ident@44..46 "f2"
+    Whitespace@46..47 " "
+    TypeName@47..51
+      String@47..51
+        TextP@47..51 "text"
   Ascii41@51..52 ")"
   Newline@52..53 "\n"
   Whitespace@53..57 "    "
-  As@57..59 "AS"
-  Whitespace@59..60 " "
-  Sconst@60..107 "$$ SELECT $1, CAST($1 ..."
+  DefElem@57..107
+    As@57..59 "AS"
+    Whitespace@59..60 " "
+    List@60..107
+      String@60..107
+        Sconst@60..107 "$$ SELECT $1, CAST($1 ..."
   Newline@107..108 "\n"
   Whitespace@108..112 "    "
-  Language@112..120 "LANGUAGE"
-  Whitespace@120..121 " "
-  SqlP@121..124 "SQL"
+  DefElem@112..124
+    Language@112..120 "LANGUAGE"
+    Whitespace@120..121 " "
+    String@121..124
+      SqlP@121..124 "SQL"
   Ascii59@124..125 ";"
-  Newline@125..126 "\n"
 

From e65f8de52880cae5a3739b695811676f40c22264 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Thu, 12 Oct 2023 22:16:33 +0200
Subject: [PATCH 14/16] chore: cleanup

---
 crates/parser/src/estimate_node_range.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index a55ffb65..524efe96 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -82,13 +82,9 @@ pub fn estimate_node_range(
 
     let mut ranged_nodes: Vec<RangedNode> = Vec::new();
 
-    // we get an estimated range by searching for tokens that match the node property values
-    // and, if available, the `location` of the node itself
     nodes.iter().enumerate().for_each(|(idx, n)| {
         let child_token_range = child_token_ranges[idx];
 
-        println!("node: {:#?}, child_token_range: {:?}", n, child_token_range);
-
         let child_node_ranges = ranged_nodes
             .iter()
             .filter(|x| x.inner.path.starts_with(n.path.as_str()))

From 4d97205f723957414be9d20e7669f8958196f954 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Fri, 13 Oct 2023 17:10:20 +0200
Subject: [PATCH 15/16] chore: add comments

---
 crates/parser/src/lib.rs              |  8 ++++----
 crates/parser/src/statement_parser.rs | 15 ++++++++++-----
 crates/parser/src/syntax_error.rs     |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 493b49cb..2a24b634 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -3,17 +3,17 @@
 //! This crate provides a parser for the Postgres SQL dialect.
 //! It is based in the pg_query.rs crate, which is a wrapper around the PostgreSQL query parser.
 //! The main `Parser` struct parses a source file and individual statements.
-//! The `Parse` struct contains the resulting concrete syntax tree, syntax errors, and the abtract syntax tree, which is a list of pg_query statements and their positions.
+//! The `Parse` result struct contains the resulting concrete syntax tree, syntax errors, and the abtract syntax tree, which is a list of pg_query statements and their positions.
 //!
 //! The idea is to offload the heavy lifting to the same parser that the PostgreSQL server uses,
-//! and just fill in the gaps to be able to build both cst and ast from a a source file that
+//! and just fill in the gaps to be able to build both cst and ast from a source file that
 //! potentially contains erroneous statements.
 //!
 //! The main drawbacks of the PostgreSQL query parser mitigated by this parser are:
 //! - it only parsed a full source text, and if there is any syntax error in a file, it will not parse anything and return an error.
-//! - it does not parse whitespaces and newlines, so it is not possible to build a concrete syntax tree build a concrete syntax tree.
+//! - it does not parse whitespaces and newlines, and it only returns ast nodes. The concrete syntax tree has to be reverse-engineered.
 //!
-//! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
+//! To see how these drawbacks are mitigated, see the `statement_parser.rs` and the `source_parser.rs` module.
 
 mod ast_node;
 mod estimate_node_range;
diff --git a/crates/parser/src/statement_parser.rs b/crates/parser/src/statement_parser.rs
index 46e7126d..25d35511 100644
--- a/crates/parser/src/statement_parser.rs
+++ b/crates/parser/src/statement_parser.rs
@@ -9,10 +9,7 @@ use crate::{
     syntax_kind_codegen::SyntaxKind,
 };
 
-/// A super simple lexer for sql statements.
-///
-/// One weakness of pg_query.rs is that it does not parse whitespace or newlines. We use a very
-/// simple lexer to fill the gaps.
+/// Super simple lexer that only catches the tokens that libpg_query ignores.
 #[derive(Logos, Debug, PartialEq)]
 pub enum StatementToken {
     // comments and whitespaces
@@ -27,7 +24,7 @@ pub enum StatementToken {
 }
 
 impl StatementToken {
-    /// Creates a `SyntaxKind` from a `StatementToken`.
+    /// Create a `SyntaxKind` from a `StatementToken`.
     pub fn syntax_kind(&self) -> SyntaxKind {
         match self {
             StatementToken::Whitespace => SyntaxKind::Whitespace,
@@ -39,6 +36,13 @@ impl StatementToken {
 }
 
 impl Parser {
+    /// Parse a single statement passed in `text`. If `at_offset` is `Some`, the statement is assumed to be at that offset in the source file.
+    ///
+    /// On a high level, the parser works as follows:
+    /// - 1. Collect all information from pg_query.rs and `StatementToken` lexer
+    /// - 2. Derive as much information as possible from the collected information
+    /// - 3. Collect AST node and errors, if any
+    /// - 3. Walk the statement token by token, and reverse-engineer the concrete syntax tree
     pub fn parse_statement_at(&mut self, text: &str, at_offset: Option<u32>) {
         // 1. Collect as much information as possible from pg_query.rs and `StatementToken` lexer
 
@@ -98,6 +102,7 @@ impl Parser {
         let mut statement_token_lexer = StatementToken::lexer(&text);
 
         // 2. Setup data structures required for the parsing algorithm
+
         // A buffer for tokens that are not applied immediately to the cst
         let mut token_buffer: VecDeque<(SyntaxKind, String)> = VecDeque::new();
         // Keeps track of currently open nodes. Latest opened is last.
diff --git a/crates/parser/src/syntax_error.rs b/crates/parser/src/syntax_error.rs
index d8dee689..df6d670f 100644
--- a/crates/parser/src/syntax_error.rs
+++ b/crates/parser/src/syntax_error.rs
@@ -2,7 +2,7 @@ use std::fmt;
 
 use cstree::text::{TextRange, TextSize};
 
-/// Represents the result of unsuccessful tokenization, parsing
+/// Represents the result of unsuccessful tokenization, parsing,
 /// or tree validation.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SyntaxError(String, TextRange);

From 197f3512aecbdb2ee1cdb3db8da118f0f66005e8 Mon Sep 17 00:00:00 2001
From: psteinroe <philipp@steinroetter.com>
Date: Fri, 13 Oct 2023 17:12:46 +0200
Subject: [PATCH 16/16] chore: add comments

---
 crates/parser/src/estimate_node_range.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs
index 524efe96..8c8e514b 100644
--- a/crates/parser/src/estimate_node_range.rs
+++ b/crates/parser/src/estimate_node_range.rs
@@ -53,9 +53,9 @@ pub fn estimate_node_range(
         };
     });
 
-    // second iteration using the nearest parent from the first
+    // second iteration using the nearest parent from the first, or the location of the nearest
+    // parent node
     for idx in too_many_tokens_at {
-        // get the nearest parent location
         let nearest_parent_start =
             get_nearest_parent_start(&nodes[idx], &nodes, &child_token_ranges);
         let nearest_parent_location = get_nearest_parent_location(&nodes[idx], &nodes);
@@ -90,7 +90,7 @@ pub fn estimate_node_range(
             .filter(|x| x.inner.path.starts_with(n.path.as_str()))
             .collect::<Vec<&RangedNode>>();
 
-        // get `from` location
+        // get `from` location as the smaller value of the location of the node, the start of all children nodes, and the start of the first child token
         let node_location = match get_location(&n.node) {
             Some(l) => Some(TextSize::from(l)),
             None => None,
@@ -141,6 +141,7 @@ pub fn estimate_node_range(
         let to = to_locations.iter().filter(|v| v.is_some()).max();
 
         if from.is_some() && to.is_some() {
+            // ignore nodes that have no range. They are not relevant for the cst.
             ranged_nodes.push(RangedNode {
                 inner: n.to_owned(),
                 range: TextRange::new(from.unwrap().unwrap(), to.unwrap().unwrap()),