Skip to content

Commit 05ea6d0

Browse files
committed
Port incremental parse to CodeBlockItem
Use `LookaheadTracker` to track the furthest offset we reached and use `LookaheadRanges` to record the offset as the source range where might affect the parse of a node. And we could use that information to judge whether a node can be reused
1 parent 1555299 commit 05ea6d0

File tree

13 files changed

+376
-58
lines changed

13 files changed

+376
-58
lines changed

CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
2424
/// Parse the source code in the given string as Swift source file. See
2525
/// `Parser.init` for more details.
2626
public static func parse(
27-
source: String,
28-
parseTransition: IncrementalParseTransition? = nil
27+
source: String
2928
) -> SourceFileSyntax {
3029
var parser = Parser(source)
3130
return SourceFileSyntax.parse(from: &parser)
@@ -35,18 +34,66 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
3534

3635
DeclSyntax(
3736
"""
38-
/// Parse the source code in the given string as Swift source file. See
37+
/// Parse the source code in the given buffer as Swift source file. See
3938
/// `Parser.init` for more details.
4039
public static func parse(
4140
source: UnsafeBufferPointer<UInt8>,
42-
maximumNestingLevel: Int? = nil,
43-
parseTransition: IncrementalParseTransition? = nil
41+
maximumNestingLevel: Int? = nil
4442
) -> SourceFileSyntax {
4543
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
4644
return SourceFileSyntax.parse(from: &parser)
4745
}
4846
"""
4947
)
48+
49+
DeclSyntax(
50+
"""
51+
/// Parse the source code in the given string as Swift source file with support
52+
/// for incremental parsing.
53+
///
54+
/// When parsing a source file for the first time, invoke `parseIncrementally`
55+
/// with `parseTransition: nil`. This returns the initial tree as well as
56+
/// ``LookaheadRanges``. If an edit is made to the source file, an
57+
/// ``IncrementalParseTransition`` can be constructed from the initial tree
58+
/// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with
59+
/// the post-edit source and that parse transition, the parser will re-use
60+
/// nodes that haven’t changed.
61+
///
62+
/// - Parameters:
63+
/// - source: The source code to parse
64+
/// - parseTransition: If a similar source file has already been parsed, the
65+
/// ``IncrementalParseTransition`` that contains the previous tree as well
66+
/// as the edits that were performed to it.
67+
/// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe
68+
/// how far the parser looked ahead while parsing a node, which is
69+
/// necessary to construct an ``IncrementalParseTransition`` for a
70+
/// subsequent incremental parse
71+
public static func parseIncrementally(
72+
source: String,
73+
parseTransition: IncrementalParseTransition?
74+
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
75+
var parser = Parser(source, parseTransition: parseTransition)
76+
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
77+
}
78+
"""
79+
)
80+
81+
DeclSyntax(
82+
"""
83+
/// Parse the source code in the given buffer as Swift source file with support
84+
/// for incremental parsing.
85+
///
86+
/// See doc comments above
87+
public static func parseIncrementally(
88+
source: UnsafeBufferPointer<UInt8>,
89+
maximumNestingLevel: Int? = nil,
90+
parseTransition: IncrementalParseTransition?
91+
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
92+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
93+
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
94+
}
95+
"""
96+
)
5097
}
5198

5299
DeclSyntax(

Sources/SwiftParser/IncrementalParseTransition.swift

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,25 @@
2020
/// This is also used for testing purposes to ensure incremental reparsing
2121
/// worked as expected.
2222
public protocol IncrementalParseReusedNodeDelegate {
23-
/// Accepts the range and ``Syntax`` node of skipped source region.
23+
/// Accepts ``Syntax`` node of skipped source region.
2424
///
2525
/// - Parameters:
26-
/// - range: The source region of the currently parsed source.
2726
/// - previousNode: The node from the previous tree that is associated with
2827
/// the skipped source region.
29-
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
28+
func parserReusedNode(previousNode: Syntax)
3029
}
3130

3231
/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
3332
/// the range and re-used node into an array.
3433
public final class IncrementalParseReusedNodeCollector:
3534
IncrementalParseReusedNodeDelegate
3635
{
37-
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
36+
public var nodes: [Syntax] = []
3837

3938
public init() {}
4039

41-
public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
42-
rangeAndNodes.append((range, previousNode))
40+
public func parserReusedNode(previousNode: Syntax) {
41+
nodes.append(previousNode)
4342
}
4443
}
4544

@@ -48,6 +47,7 @@ public final class IncrementalParseReusedNodeCollector:
4847
public final class IncrementalParseTransition {
4948
fileprivate let previousTree: SourceFileSyntax
5049
fileprivate let edits: ConcurrentEdits
50+
fileprivate let lookaheadRanges: LookaheadRanges
5151
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
5252

5353
/// - Parameters:
@@ -59,10 +59,12 @@ public final class IncrementalParseTransition {
5959
public init(
6060
previousTree: SourceFileSyntax,
6161
edits: ConcurrentEdits,
62+
lookaheadRanges: LookaheadRanges,
6263
reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
6364
) {
6465
self.previousTree = previousTree
6566
self.edits = edits
67+
self.lookaheadRanges = lookaheadRanges
6668
self.reusedDelegate = reusedNodeDelegate
6769
}
6870
}
@@ -109,7 +111,6 @@ public struct IncrementalParseLookup {
109111
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
110112
if let delegate = reusedDelegate, let node {
111113
delegate.parserReusedNode(
112-
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
113114
previousNode: node
114115
)
115116
}
@@ -148,24 +149,13 @@ public struct IncrementalParseLookup {
148149
return true
149150
}
150151

151-
// Node can also not be reused if an edit has been made in the next token's
152-
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
153-
// a StructDecl inside and `private struc Foo {}` parses as two
154-
// CodeBlockItems one for `private` and one for `struc Foo {}`
155-
var nextLeafNodeLength: SourceLength = .zero
156-
if let nextSibling = cursor.nextSibling {
157-
// Fast path check: if next sibling is before all the edits then we can
158-
// re-use the node.
159-
if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
160-
return true
161-
}
162-
if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
163-
nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
164-
}
152+
guard let nodeAffectRangeLength = transition.lookaheadRanges.lookaheadRanges[node.raw.id] else {
153+
return false
165154
}
155+
166156
let nodeAffectRange = ByteSourceRange(
167157
offset: node.position.utf8Offset,
168-
length: (node.totalLength + nextLeafNodeLength).utf8Length
158+
length: nodeAffectRangeLength
169159
)
170160

171161
for edit in edits.edits {

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,60 @@ extension Lexer {
3232
/// usually less than 0.1% of the memory allocated by the syntax arena.
3333
var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
3434

35-
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
35+
/// The offset of the end trailing trivia of `nextToken` relative to the source buffer’s start.
36+
var offsetToNextTokenEnd: Int {
37+
self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
38+
}
39+
40+
/// See doc comments in ``LookaheadTracker``
41+
///
42+
/// This is an `UnsafeMutablePointer` for two reasons
43+
/// - When `LexemeSequence` gets copied (e.g. when a ``Lookahead`` gets created), it should still reference the same ``LookaheadTracker`` so that any lookahead performed in the ``Lookahead`` also affects the original ``Parser``. It thus needs to be a reference type
44+
/// - ``LookaheadTracker`` is not a class to avoid reference counting it. The ``Parser`` that creates the ``LexemeSequence`` will always outlive any ``Lookahead`` created for it.
45+
let lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
46+
47+
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor, lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>) {
3648
self.sourceBufferStart = sourceBufferStart
3749
self.cursor = cursor
3850
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
51+
self.lookaheadTracker = lookaheadTracker
3952
}
4053

4154
@_spi(Testing)
4255
public mutating func next() -> Lexer.Lexeme? {
4356
return self.advance()
4457
}
4558

59+
/// Record the offset of the end of `nextToken` as the furthest offset in ``LookaheadTracker``
60+
func recordNextTokenInLookaheadTracker() {
61+
self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
62+
}
63+
4664
mutating func advance() -> Lexer.Lexeme {
4765
defer {
4866
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
4967
}
68+
self.recordNextTokenInLookaheadTracker()
5069
return self.nextToken
5170
}
5271

72+
/// Get the offset of the leading trivia start of `token` relative to `sourceBufferStart`.
73+
func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
74+
return self.sourceBufferStart.distance(to: token.cursor)
75+
}
76+
77+
/// Advance the the cursor by `offset` and reset `currentToken`
78+
///
79+
/// - Important: This should only be used for incremental parsing.
80+
mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
81+
self.cursor = currentToken.cursor
82+
self.cursor.position = self.cursor.position.advanced(by: offset)
83+
84+
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
85+
86+
currentToken = self.advance()
87+
}
88+
5389
/// Reset the lexeme sequence to the state we were in when lexing `splitToken`
5490
/// but after we consumed `consumedPrefix` bytes from `splitToken`.
5591
/// - Warning: Do not add more usages of this function.
@@ -63,6 +99,7 @@ extension Lexer {
6399
}
64100

65101
func peek() -> Lexer.Lexeme {
102+
self.recordNextTokenInLookaheadTracker()
66103
return self.nextToken
67104
}
68105

@@ -104,12 +141,13 @@ extension Lexer {
104141
@_spi(Testing)
105142
public static func tokenize(
106143
_ input: UnsafeBufferPointer<UInt8>,
107-
from startIndex: Int = 0
144+
from startIndex: Int = 0,
145+
lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
108146
) -> LexemeSequence {
109147
precondition(input.isEmpty || startIndex < input.endIndex)
110148
let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex - 1]
111149
let start = Cursor(input: input, previous: UInt8(ascii: "\0"))
112150
let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar)
113-
return LexemeSequence(sourceBufferStart: start, cursor: cursor)
151+
return LexemeSequence(sourceBufferStart: start, cursor: cursor, lookaheadTracker: lookaheadTracker)
114152
}
115153
}

Sources/SwiftParser/Lookahead.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extension Parser {
3333
) {
3434
self.lexemes = lexemes
3535
self.currentToken = currentToken
36+
self.lexemes.recordNextTokenInLookaheadTracker()
3637
}
3738

3839
fileprivate init(cloning other: Parser) {
@@ -90,6 +91,7 @@ extension Parser.Lookahead {
9091
mutating func consumeAnyToken() {
9192
tokensConsumed += 1
9293
self.currentToken = self.lexemes.advance()
94+
self.lexemes.recordNextTokenInLookaheadTracker()
9395
}
9496

9597
mutating func consumeAnyToken(remapping: RawTokenKind) {

0 commit comments

Comments
 (0)