Skip to content

Commit 82c73e5

Browse files
committed
Port incremental parse to CodeBlockItem
Use `LookaheadTracker` to track the furthest offset we reached and use `IncrementalParseNodeAffectRangeCollector` to record the offset as the source range where might affect the parse of a node. And we could use that information to judge whether a node can be reused
1 parent 1555299 commit 82c73e5

File tree

11 files changed

+298
-51
lines changed

11 files changed

+298
-51
lines changed

CodeGeneration/Sources/generate-swiftsyntax/templates/swiftparser/ParserEntryFile.swift

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
2424
/// Parse the source code in the given string as Swift source file. See
2525
/// `Parser.init` for more details.
2626
public static func parse(
27-
source: String,
28-
parseTransition: IncrementalParseTransition? = nil
27+
source: String
2928
) -> SourceFileSyntax {
3029
var parser = Parser(source)
3130
return SourceFileSyntax.parse(from: &parser)
@@ -35,18 +34,44 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
3534

3635
DeclSyntax(
3736
"""
38-
/// Parse the source code in the given string as Swift source file. See
37+
/// Parse the source code in the given buffer as Swift source file. See
3938
/// `Parser.init` for more details.
4039
public static func parse(
4140
source: UnsafeBufferPointer<UInt8>,
42-
maximumNestingLevel: Int? = nil,
43-
parseTransition: IncrementalParseTransition? = nil
41+
maximumNestingLevel: Int? = nil
4442
) -> SourceFileSyntax {
4543
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
4644
return SourceFileSyntax.parse(from: &parser)
4745
}
4846
"""
4947
)
48+
49+
DeclSyntax(
50+
"""
51+
/// Parse the source code in the given string as Swift source file while return `nodeAffectRangeCollector` to enable incremental parse.
52+
public static func parse(
53+
source: String,
54+
parseTransition: IncrementalParseTransition? = nil
55+
) -> (tree: SourceFileSyntax, nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector) {
56+
var parser = Parser(source, parseTransition: parseTransition)
57+
return (SourceFileSyntax.parse(from: &parser), parser.parseNodeAffectRange)
58+
}
59+
"""
60+
)
61+
62+
DeclSyntax(
63+
"""
64+
/// parse the source code in the given buffer as Swift source file while return `nodeAffectRangeCollector` to enable incremental parse.
65+
public static func parse(
66+
source: UnsafeBufferPointer<UInt8>,
67+
maximumNestingLevel: Int? = nil,
68+
parseTransition: IncrementalParseTransition? = nil
69+
) -> (tree: SourceFileSyntax, nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector) {
70+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
71+
return (SourceFileSyntax.parse(from: &parser), parser.parseNodeAffectRange)
72+
}
73+
"""
74+
)
5075
}
5176

5277
DeclSyntax(

Sources/SwiftParser/IncrementalParseTransition.swift

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,39 @@
2020
/// This is also used for testing purposes to ensure incremental reparsing
2121
/// worked as expected.
2222
public protocol IncrementalParseReusedNodeDelegate {
23-
/// Accepts the range and ``Syntax`` node of skipped source region.
23+
/// Accepts ``Syntax`` node of skipped source region.
2424
///
2525
/// - Parameters:
26-
/// - range: The source region of the currently parsed source.
2726
/// - previousNode: The node from the previous tree that is associated with
2827
/// the skipped source region.
29-
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
28+
func parserReusedNode(previousNode: Syntax)
3029
}
3130

3231
/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
3332
/// the range and re-used node into an array.
3433
public final class IncrementalParseReusedNodeCollector:
3534
IncrementalParseReusedNodeDelegate
3635
{
37-
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
36+
public var nodes: [Syntax] = []
3837

3938
public init() {}
4039

41-
public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
42-
rangeAndNodes.append((range, previousNode))
40+
public func parserReusedNode(previousNode: Syntax) {
41+
nodes.append(previousNode)
42+
}
43+
}
44+
45+
/// Record the affect range for potential re-used nodes. When edits intersect the affect range, the node is not able to be re-used.
46+
public struct IncrementalParseNodeAffectRangeCollector {
47+
/// A dict to record the utf8 length in source that might affect the parse of a node.
48+
/// This information is used to determine whether a node can be reused
49+
fileprivate var nodeAffectRange: [RawSyntax.ID: Int] = [:]
50+
51+
public init() {}
52+
53+
@_spi(RawSyntax)
54+
public mutating func registerNodeForIncrementalParse(node: RawSyntax, length: Int) {
55+
self.nodeAffectRange[node.id] = length
4356
}
4457
}
4558

@@ -48,6 +61,7 @@ public final class IncrementalParseReusedNodeCollector:
4861
public final class IncrementalParseTransition {
4962
fileprivate let previousTree: SourceFileSyntax
5063
fileprivate let edits: ConcurrentEdits
64+
fileprivate let nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector
5165
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
5266

5367
/// - Parameters:
@@ -59,10 +73,12 @@ public final class IncrementalParseTransition {
5973
public init(
6074
previousTree: SourceFileSyntax,
6175
edits: ConcurrentEdits,
76+
nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector,
6277
reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
6378
) {
6479
self.previousTree = previousTree
6580
self.edits = edits
81+
self.nodeAffectRangeCollector = nodeAffectRangeCollector
6682
self.reusedDelegate = reusedNodeDelegate
6783
}
6884
}
@@ -73,6 +89,10 @@ public struct IncrementalParseLookup {
7389
fileprivate let transition: IncrementalParseTransition
7490
fileprivate var cursor: SyntaxCursor
7591

92+
fileprivate var nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector {
93+
return transition.nodeAffectRangeCollector
94+
}
95+
7696
/// Create a new ``IncrementalParseLookup`` that can look nodes up based on the
7797
/// given ``IncrementalParseTransition``.
7898
public init(transition: IncrementalParseTransition) {
@@ -109,7 +129,6 @@ public struct IncrementalParseLookup {
109129
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
110130
if let delegate = reusedDelegate, let node {
111131
delegate.parserReusedNode(
112-
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
113132
previousNode: node
114133
)
115134
}
@@ -148,24 +167,13 @@ public struct IncrementalParseLookup {
148167
return true
149168
}
150169

151-
// Node can also not be reused if an edit has been made in the next token's
152-
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
153-
// a StructDecl inside and `private struc Foo {}` parses as two
154-
// CodeBlockItems one for `private` and one for `struc Foo {}`
155-
var nextLeafNodeLength: SourceLength = .zero
156-
if let nextSibling = cursor.nextSibling {
157-
// Fast path check: if next sibling is before all the edits then we can
158-
// re-use the node.
159-
if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
160-
return true
161-
}
162-
if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
163-
nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
164-
}
170+
guard let nodeAffectRangeLength = nodeAffectRangeCollector.nodeAffectRange[node.raw.id] else {
171+
return false
165172
}
173+
166174
let nodeAffectRange = ByteSourceRange(
167175
offset: node.position.utf8Offset,
168-
length: (node.totalLength + nextLeafNodeLength).utf8Length
176+
length: nodeAffectRangeLength
169177
)
170178

171179
for edit in edits.edits {

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,54 @@ extension Lexer {
3232
/// usually less than 0.1% of the memory allocated by the syntax arena.
3333
var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
3434

35+
/// Compute the offset of the end of next token
36+
var offsetToNextTokenEnd: Int {
37+
self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
38+
}
39+
40+
/// See doc comments in ``LookaheadTracker``
41+
var lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
42+
3543
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
3644
self.sourceBufferStart = sourceBufferStart
3745
self.cursor = cursor
3846
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
47+
self.lookaheadTracker = .allocate(capacity: 1)
48+
self.lookaheadTracker.initialize(to: LookaheadTracker())
3949
}
4050

4151
@_spi(Testing)
4252
public mutating func next() -> Lexer.Lexeme? {
4353
return self.advance()
4454
}
4555

56+
/// Record the offset of the end of next token as the furthest offset in ``LookaheadTracker``
57+
func recordFurthestOffset() {
58+
self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
59+
}
60+
4661
mutating func advance() -> Lexer.Lexeme {
4762
defer {
4863
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
4964
}
5065
return self.nextToken
5166
}
5267

68+
/// Get the offset of `token` to `sourceBufferStart`.
69+
func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
70+
return self.sourceBufferStart.distance(to: token.cursor)
71+
}
72+
73+
/// Advance the the cursor by `offset` and reset `currentToken`
74+
mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
75+
self.cursor = currentToken.cursor
76+
self.cursor.position = self.cursor.position.advanced(by: offset)
77+
78+
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
79+
80+
currentToken = self.advance()
81+
}
82+
5383
/// Reset the lexeme sequence to the state we were in when lexing `splitToken`
5484
/// but after we consumed `consumedPrefix` bytes from `splitToken`.
5585
/// - Warning: Do not add more usages of this function.

Sources/SwiftParser/Lookahead.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extension Parser {
3333
) {
3434
self.lexemes = lexemes
3535
self.currentToken = currentToken
36+
self.lexemes.recordFurthestOffset()
3637
}
3738

3839
fileprivate init(cloning other: Parser) {
@@ -90,6 +91,7 @@ extension Parser.Lookahead {
9091
mutating func consumeAnyToken() {
9192
tokensConsumed += 1
9293
self.currentToken = self.lexemes.advance()
94+
self.lexemes.recordFurthestOffset()
9395
}
9496

9597
mutating func consumeAnyToken(remapping: RawTokenKind) {

Sources/SwiftParser/Parser.swift

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ public struct Parser {
101101
/// When this nesting level is exceeded, the parser should stop parsing.
102102
let maximumNestingLevel: Int
103103

104+
/// See commens in ``IncrementalParseLookup``
105+
let parseLookup: IncrementalParseLookup?
106+
107+
/// See comments in ``IncrementalParseNodeAffectRangeCollector``
108+
var parseNodeAffectRange = IncrementalParseNodeAffectRangeCollector()
109+
104110
/// A default maximum nesting level that is used if the client didn't
105111
/// explicitly specify one. Debug builds of the parser comume a lot more stack
106112
/// space and thus have a lower default maximum nesting level.
@@ -111,7 +117,11 @@ public struct Parser {
111117
#endif
112118

113119
/// Initializes a ``Parser`` from the given string.
114-
public init(_ input: String, maximumNestingLevel: Int? = nil) {
120+
public init(
121+
_ input: String,
122+
maximumNestingLevel: Int? = nil,
123+
parseTransition: IncrementalParseTransition? = nil
124+
) {
115125
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
116126

117127
self.arena = ParsingSyntaxArena(
@@ -126,6 +136,11 @@ public struct Parser {
126136

127137
self.lexemes = Lexer.tokenize(interned)
128138
self.currentToken = self.lexemes.advance()
139+
if let parseTransition {
140+
self.parseLookup = IncrementalParseLookup(transition: parseTransition)
141+
} else {
142+
self.parseLookup = nil
143+
}
129144
}
130145

131146
/// Initializes a ``Parser`` from the given input buffer.
@@ -142,7 +157,12 @@ public struct Parser {
142157
/// arena is created automatically, and `input` copied into the
143158
/// arena. If non-`nil`, `input` must be within its registered
144159
/// source buffer or allocator.
145-
public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: ParsingSyntaxArena? = nil) {
160+
public init(
161+
_ input: UnsafeBufferPointer<UInt8>,
162+
maximumNestingLevel: Int? = nil,
163+
parseTransition: IncrementalParseTransition? = nil,
164+
arena: ParsingSyntaxArena? = nil
165+
) {
146166
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
147167

148168
var sourceBuffer: UnsafeBufferPointer<UInt8>
@@ -159,6 +179,11 @@ public struct Parser {
159179

160180
self.lexemes = Lexer.tokenize(sourceBuffer)
161181
self.currentToken = self.lexemes.advance()
182+
if let parseTransition {
183+
self.parseLookup = IncrementalParseLookup(transition: parseTransition)
184+
} else {
185+
self.parseLookup = nil
186+
}
162187
}
163188

164189
mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
@@ -237,6 +262,7 @@ public struct Parser {
237262
extension Parser {
238263
/// Retrieves the token following the current token without consuming it.
239264
func peek() -> Lexer.Lexeme {
265+
lexemes.recordFurthestOffset()
240266
return self.lexemes.peek()
241267
}
242268
}
@@ -629,3 +655,41 @@ extension Parser {
629655
)
630656
}
631657
}
658+
659+
// MARK: Incremental Parsing
660+
extension Parser {
661+
mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
662+
guard var parseLookup else {
663+
return nil
664+
}
665+
666+
let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
667+
if let node = parseLookup.lookUp(currentOffset, kind: kind) {
668+
self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
669+
return node
670+
}
671+
672+
return nil
673+
}
674+
675+
mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
676+
parseNodeAffectRange.registerNodeForIncrementalParse(
677+
node: node,
678+
length: max(lookaheadFurthestOffset - self.lexemes.getOffsetToStart(startToken), node.byteLength + currentToken.byteLength)
679+
)
680+
}
681+
682+
public var lookaheadFurthestOffset: Int {
683+
return lexemes.lookaheadTracker.pointee.furthestOffset
684+
}
685+
}
686+
687+
/// Record the furthest offset to `sourceBufferStart` that is reached by ``Parser.Peek()`` or ``Lookahead`` in ``Lexer/LexemeSequence``
688+
struct LookaheadTracker {
689+
private(set) var furthestOffset: Int = 0
690+
691+
public mutating func recordFurthestOffset(_ furthestOffset: Int) {
692+
/// We could lookahead multi-times to find different valid part of a node, so we should take the maximum of the lookahead offset as the possible affect range of a node.
693+
self.furthestOffset = max(furthestOffset, self.furthestOffset)
694+
}
695+
}

Sources/SwiftParser/TopLevel.swift

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ extension Parser {
151151
/// statement → compiler-control-statement
152152
/// statements → statement statements?
153153
mutating func parseCodeBlockItem(isAtTopLevel: Bool, allowInitDecl: Bool) -> RawCodeBlockItemSyntax? {
154+
let startToken = self.currentToken
155+
if let syntax = self.loadCurrentSyntaxNodeFromCache(for: .codeBlockItem) {
156+
self.registerNodeForIncrementalParse(node: syntax.raw, startToken: startToken)
157+
return RawCodeBlockItemSyntax(syntax.raw)
158+
}
159+
154160
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
155161
return RawCodeBlockItemSyntax(
156162
remainingTokens,
@@ -183,12 +189,17 @@ extension Parser {
183189
if item.raw.isEmpty && semi == nil && trailingSemis.isEmpty {
184190
return nil
185191
}
186-
return RawCodeBlockItemSyntax(
192+
193+
let result = RawCodeBlockItemSyntax(
187194
item: item,
188195
semicolon: semi,
189196
RawUnexpectedNodesSyntax(trailingSemis, arena: self.arena),
190197
arena: self.arena
191198
)
199+
200+
self.registerNodeForIncrementalParse(node: result.raw, startToken: startToken)
201+
202+
return result
192203
}
193204

194205
private mutating func parseStatementItem() -> RawCodeBlockItemSyntax.Item {

0 commit comments

Comments
 (0)