Skip to content

Commit fdcd570

Browse files
committed
Port incremental parse to CodeBlockItem
Use `LookaheadTracker` to track the furthest offset we reached and use `LookaheadRanges` to record the offset as the source range where might affect the parse of a node. And we could use that information to judge whether a node can be reused
1 parent 1555299 commit fdcd570

File tree

12 files changed

+380
-62
lines changed

12 files changed

+380
-62
lines changed

CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
2424
/// Parse the source code in the given string as Swift source file. See
2525
/// `Parser.init` for more details.
2626
public static func parse(
27-
source: String,
28-
parseTransition: IncrementalParseTransition? = nil
27+
source: String
2928
) -> SourceFileSyntax {
3029
var parser = Parser(source)
3130
return SourceFileSyntax.parse(from: &parser)
@@ -35,18 +34,66 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
3534

3635
DeclSyntax(
3736
"""
38-
/// Parse the source code in the given string as Swift source file. See
37+
/// Parse the source code in the given buffer as Swift source file. See
3938
/// `Parser.init` for more details.
4039
public static func parse(
4140
source: UnsafeBufferPointer<UInt8>,
42-
maximumNestingLevel: Int? = nil,
43-
parseTransition: IncrementalParseTransition? = nil
41+
maximumNestingLevel: Int? = nil
4442
) -> SourceFileSyntax {
4543
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
4644
return SourceFileSyntax.parse(from: &parser)
4745
}
4846
"""
4947
)
48+
49+
DeclSyntax(
50+
"""
51+
/// Parse the source code in the given string as Swift source file with support
52+
/// for incremental parsing.
53+
///
54+
/// When parsing a source file for the first time, invoke `parseIncrementally`
55+
/// with `parseTransition: nil`. This returns the initial tree as well as
56+
/// ``LookaheadRanges``. If an edit is made to the source file, an
57+
/// ``IncrementalParseTransition`` can be constructed from the initial tree
58+
/// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with
59+
/// the post-edit source and that parse transition, the parser will re-use
60+
/// nodes that haven’t changed.
61+
///
62+
/// - Parameters:
63+
/// - source: The source code to parse
64+
/// - parseTransition: If a similar source file has already been parsed, the
65+
/// ``IncrementalParseTransition`` that contains the previous tree as well
66+
/// as the edits that were performed to it.
67+
/// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe
68+
/// how far the parser looked ahead while parsing a node, which is
69+
/// necessary to construct an ``IncrementalParseTransition`` for a
70+
/// subsequent incremental parse
71+
public static func parseIncrementally(
72+
source: String,
73+
parseTransition: IncrementalParseTransition?
74+
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
75+
var parser = Parser(source, parseTransition: parseTransition)
76+
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
77+
}
78+
"""
79+
)
80+
81+
DeclSyntax(
82+
"""
83+
/// Parse the source code in the given buffer as Swift source file with support
84+
/// for incremental parsing.
85+
///
86+
/// See doc comments in ``Parser/parseIncrementally(source:parseTransition:)``
87+
public static func parseIncrementally(
88+
source: UnsafeBufferPointer<UInt8>,
89+
maximumNestingLevel: Int? = nil,
90+
parseTransition: IncrementalParseTransition?
91+
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
92+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
93+
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
94+
}
95+
"""
96+
)
5097
}
5198

5299
DeclSyntax(

Sources/SwiftParser/IncrementalParseTransition.swift

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,30 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
@_spi(RawSyntax) import SwiftSyntax
14+
15+
extension Parser {
16+
mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
17+
guard parseLookup != nil else {
18+
return nil
19+
}
20+
21+
let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
22+
if let node = parseLookup!.lookUp(currentOffset, kind: kind) {
23+
self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
24+
return node
25+
}
26+
27+
return nil
28+
}
29+
30+
mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
31+
lookaheadRanges.registerNodeForIncrementalParse(
32+
node: node,
33+
lookaheadLength: lexemes.lookaheadTracker.pointee.furthestOffset - self.lexemes.getOffsetToStart(startToken)
34+
)
35+
}
36+
}
37+
1438
/// Accepts the re-used ``Syntax`` nodes that `IncrementalParseTransition`
1539
/// determined they should be re-used for a parse invocation.
1640
///
@@ -20,26 +44,25 @@
2044
/// This is also used for testing purposes to ensure incremental reparsing
2145
/// worked as expected.
2246
public protocol IncrementalParseReusedNodeDelegate {
23-
/// Accepts the range and ``Syntax`` node of skipped source region.
47+
/// Accepts ``Syntax`` node of skipped source region.
2448
///
2549
/// - Parameters:
26-
/// - range: The source region of the currently parsed source.
2750
/// - previousNode: The node from the previous tree that is associated with
2851
/// the skipped source region.
29-
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
52+
func parserReusedNode(previousNode: Syntax)
3053
}
3154

3255
/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
3356
/// the range and re-used node into an array.
3457
public final class IncrementalParseReusedNodeCollector:
3558
IncrementalParseReusedNodeDelegate
3659
{
37-
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
60+
public var nodes: [Syntax] = []
3861

3962
public init() {}
4063

41-
public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
42-
rangeAndNodes.append((range, previousNode))
64+
public func parserReusedNode(previousNode: Syntax) {
65+
nodes.append(previousNode)
4366
}
4467
}
4568

@@ -48,6 +71,7 @@ public final class IncrementalParseReusedNodeCollector:
4871
public final class IncrementalParseTransition {
4972
fileprivate let previousTree: SourceFileSyntax
5073
fileprivate let edits: ConcurrentEdits
74+
fileprivate let lookaheadRanges: LookaheadRanges
5175
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
5276

5377
/// - Parameters:
@@ -59,17 +83,19 @@ public final class IncrementalParseTransition {
5983
public init(
6084
previousTree: SourceFileSyntax,
6185
edits: ConcurrentEdits,
86+
lookaheadRanges: LookaheadRanges,
6287
reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
6388
) {
6489
self.previousTree = previousTree
6590
self.edits = edits
91+
self.lookaheadRanges = lookaheadRanges
6692
self.reusedDelegate = reusedNodeDelegate
6793
}
6894
}
6995

7096
/// Provides a mechanism for the parser to skip regions of an incrementally
7197
/// updated source that was already parsed during a previous parse invocation.
72-
public struct IncrementalParseLookup {
98+
struct IncrementalParseLookup {
7399
fileprivate let transition: IncrementalParseTransition
74100
fileprivate var cursor: SyntaxCursor
75101

@@ -100,23 +126,21 @@ public struct IncrementalParseLookup {
100126
/// - Returns: A ``Syntax`` node from the previous parse invocation,
101127
/// representing the contents of this region, if it is still valid
102128
/// to re-use. `nil` otherwise.
103-
@_spi(RawSyntax)
104-
public mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
129+
fileprivate mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
105130
guard let prevOffset = translateToPreEditOffset(newOffset) else {
106131
return nil
107132
}
108133
let prevPosition = AbsolutePosition(utf8Offset: prevOffset)
109134
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
110135
if let delegate = reusedDelegate, let node {
111136
delegate.parserReusedNode(
112-
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
113137
previousNode: node
114138
)
115139
}
116140
return node
117141
}
118142

119-
mutating fileprivate func cursorLookup(
143+
fileprivate mutating func cursorLookup(
120144
prevPosition: AbsolutePosition,
121145
kind: SyntaxKind
122146
) -> Syntax? {
@@ -148,24 +172,13 @@ public struct IncrementalParseLookup {
148172
return true
149173
}
150174

151-
// Node can also not be reused if an edit has been made in the next token's
152-
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
153-
// a StructDecl inside and `private struc Foo {}` parses as two
154-
// CodeBlockItems one for `private` and one for `struc Foo {}`
155-
var nextLeafNodeLength: SourceLength = .zero
156-
if let nextSibling = cursor.nextSibling {
157-
// Fast path check: if next sibling is before all the edits then we can
158-
// re-use the node.
159-
if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
160-
return true
161-
}
162-
if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
163-
nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
164-
}
175+
guard let nodeAffectRangeLength = transition.lookaheadRanges.lookaheadRanges[node.raw.id] else {
176+
return false
165177
}
178+
166179
let nodeAffectRange = ByteSourceRange(
167180
offset: node.position.utf8Offset,
168-
length: (node.totalLength + nextLeafNodeLength).utf8Length
181+
length: nodeAffectRangeLength
169182
)
170183

171184
for edit in edits.edits {

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,60 @@ extension Lexer {
3232
/// usually less than 0.1% of the memory allocated by the syntax arena.
3333
var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
3434

35-
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
35+
/// The offset of the trailing trivia end of `nextToken` relative to the source buffer’s start.
36+
var offsetToNextTokenEnd: Int {
37+
self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
38+
}
39+
40+
/// See doc comments in ``LookaheadTracker``
41+
///
42+
/// This is an `UnsafeMutablePointer` for two reasons
43+
/// - When `LexemeSequence` gets copied (e.g. when a ``Lookahead`` gets created), it should still reference the same ``LookaheadTracker`` so that any lookahead performed in the ``Lookahead`` also affects the original ``Parser``. It thus needs to be a reference type
44+
/// - ``LookaheadTracker`` is not a class to avoid reference counting it. The ``Parser`` that creates the ``LexemeSequence`` will always outlive any ``Lookahead`` created for it.
45+
let lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
46+
47+
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor, lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>) {
3648
self.sourceBufferStart = sourceBufferStart
3749
self.cursor = cursor
3850
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
51+
self.lookaheadTracker = lookaheadTracker
3952
}
4053

4154
@_spi(Testing)
4255
public mutating func next() -> Lexer.Lexeme? {
4356
return self.advance()
4457
}
4558

59+
/// Record the offset of the end of `nextToken` as the furthest offset in ``LookaheadTracker``
60+
func recordNextTokenInLookaheadTracker() {
61+
self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
62+
}
63+
4664
mutating func advance() -> Lexer.Lexeme {
4765
defer {
4866
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
4967
}
68+
self.recordNextTokenInLookaheadTracker()
5069
return self.nextToken
5170
}
5271

72+
/// Get the offset of the leading trivia start of `token` relative to `sourceBufferStart`.
73+
func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
74+
return self.sourceBufferStart.distance(to: token.cursor)
75+
}
76+
77+
/// Advance the the cursor by `offset` and reset `currentToken`
78+
///
79+
/// - Important: This should only be used for incremental parsing.
80+
mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
81+
self.cursor = currentToken.cursor
82+
self.cursor.position = self.cursor.position.advanced(by: offset)
83+
84+
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
85+
86+
currentToken = self.advance()
87+
}
88+
5389
/// Reset the lexeme sequence to the state we were in when lexing `splitToken`
5490
/// but after we consumed `consumedPrefix` bytes from `splitToken`.
5591
/// - Warning: Do not add more usages of this function.
@@ -63,6 +99,7 @@ extension Lexer {
6399
}
64100

65101
func peek() -> Lexer.Lexeme {
102+
self.recordNextTokenInLookaheadTracker()
66103
return self.nextToken
67104
}
68105

@@ -104,12 +141,13 @@ extension Lexer {
104141
@_spi(Testing)
105142
public static func tokenize(
106143
_ input: UnsafeBufferPointer<UInt8>,
107-
from startIndex: Int = 0
144+
from startIndex: Int = 0,
145+
lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
108146
) -> LexemeSequence {
109147
precondition(input.isEmpty || startIndex < input.endIndex)
110148
let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex - 1]
111149
let start = Cursor(input: input, previous: UInt8(ascii: "\0"))
112150
let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar)
113-
return LexemeSequence(sourceBufferStart: start, cursor: cursor)
151+
return LexemeSequence(sourceBufferStart: start, cursor: cursor, lookaheadTracker: lookaheadTracker)
114152
}
115153
}

0 commit comments

Comments
 (0)