Skip to content

Commit accde01

Browse files
committed
WIP: port incremental parse to CodeBlockItem
1 parent 1555299 commit accde01

File tree

10 files changed

+258
-43
lines changed

10 files changed

+258
-43
lines changed

Sources/SwiftParser/IncrementalParseTransition.swift

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,41 @@
2020
/// This is also used for testing purposes to ensure incremental reparsing
2121
/// worked as expected.
2222
public protocol IncrementalParseReusedNodeDelegate {
23-
/// Accepts the range and ``Syntax`` node of skipped source region.
23+
/// Accepts ``Syntax`` node of skipped source region.
2424
///
2525
/// - Parameters:
26-
/// - range: The source region of the currently parsed source.
2726
/// - previousNode: The node from the previous tree that is associated with
2827
/// the skipped source region.
29-
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
28+
func parserReusedNode(previousNode: Syntax)
3029
}
3130

3231
/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
3332
/// the range and re-used node into an array.
3433
public final class IncrementalParseReusedNodeCollector:
3534
IncrementalParseReusedNodeDelegate
3635
{
37-
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
36+
public var nodes: [Syntax] = []
3837

3938
public init() {}
4039

41-
public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
42-
rangeAndNodes.append((range, previousNode))
40+
public func parserReusedNode(previousNode: Syntax) {
41+
nodes.append(previousNode)
42+
}
43+
}
44+
45+
/// Record the affect range for potential re-used nodes. When edits intersect the affect range, the node is not able to be re-used.
46+
///
47+
/// This is also a trigger to enable parser to parse incrementally.
48+
public final class IncrementalParseNodeAffectRangeCollector {
49+
/// A dict to record the utf8 length in source that might affect the parse of a node.
50+
/// This information is used to determine whether a node can be reused
51+
fileprivate var nodeAffectRange: [RawSyntax.ID: Int] = [:]
52+
53+
public init() {}
54+
55+
@_spi(RawSyntax)
56+
public func registerNodeForIncrementalParse(node: RawSyntax, length: Int) {
57+
self.nodeAffectRange[node.id] = length
4358
}
4459
}
4560

@@ -71,13 +86,17 @@ public final class IncrementalParseTransition {
7186
/// updated source that was already parsed during a previous parse invocation.
7287
public struct IncrementalParseLookup {
7388
fileprivate let transition: IncrementalParseTransition
89+
7490
fileprivate var cursor: SyntaxCursor
7591

92+
fileprivate let nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector
93+
7694
/// Create a new ``IncrementalParseLookup`` that can look nodes up based on the
7795
/// given ``IncrementalParseTransition``.
78-
public init(transition: IncrementalParseTransition) {
96+
public init(transition: IncrementalParseTransition, nodeAffectRangeCollector: IncrementalParseNodeAffectRangeCollector) {
7997
self.transition = transition
8098
self.cursor = .init(root: Syntax(transition.previousTree))
99+
self.nodeAffectRangeCollector = nodeAffectRangeCollector
81100
}
82101

83102
fileprivate var edits: ConcurrentEdits {
@@ -109,7 +128,6 @@ public struct IncrementalParseLookup {
109128
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
110129
if let delegate = reusedDelegate, let node {
111130
delegate.parserReusedNode(
112-
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
113131
previousNode: node
114132
)
115133
}
@@ -148,24 +166,13 @@ public struct IncrementalParseLookup {
148166
return true
149167
}
150168

151-
// Node can also not be reused if an edit has been made in the next token's
152-
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
153-
// a StructDecl inside and `private struc Foo {}` parses as two
154-
// CodeBlockItems one for `private` and one for `struc Foo {}`
155-
var nextLeafNodeLength: SourceLength = .zero
156-
if let nextSibling = cursor.nextSibling {
157-
// Fast path check: if next sibling is before all the edits then we can
158-
// re-use the node.
159-
if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
160-
return true
161-
}
162-
if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
163-
nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
164-
}
169+
guard let nodeAffectRangeLength = nodeAffectRangeCollector.nodeAffectRange[node.raw.id] else {
170+
return false
165171
}
172+
166173
let nodeAffectRange = ByteSourceRange(
167174
offset: node.position.utf8Offset,
168-
length: (node.totalLength + nextLeafNodeLength).utf8Length
175+
length: nodeAffectRangeLength
169176
)
170177

171178
for edit in edits.edits {

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,53 @@ extension Lexer {
3232
/// usually less than 0.1% of the memory allocated by the syntax arena.
3333
var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)
3434

35+
/// Compute the offset of the end of next token
36+
var offsetToNextTokenEnd: Int {
37+
self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
38+
}
39+
40+
/// See doc comments in ``LookaheadTracker``
41+
var lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
42+
3543
fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
3644
self.sourceBufferStart = sourceBufferStart
3745
self.cursor = cursor
3846
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
47+
self.lookaheadTracker = .allocate(capacity: 1)
48+
self.lookaheadTracker.initialize(to: LookaheadTracker())
3949
}
4050

4151
@_spi(Testing)
4252
public mutating func next() -> Lexer.Lexeme? {
4353
return self.advance()
4454
}
4555

56+
func recordFurthestOffset() {
57+
self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
58+
}
59+
4660
mutating func advance() -> Lexer.Lexeme {
4761
defer {
4862
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
4963
}
5064
return self.nextToken
5165
}
5266

67+
/// Get the offset of `token` to `sourceBufferStart`.
68+
func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
69+
return self.sourceBufferStart.distance(to: token.cursor)
70+
}
71+
72+
/// Advance the the cursor by `offset` and reset `currentToken`
73+
mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
74+
self.cursor = currentToken.cursor
75+
self.cursor.position = self.cursor.position.advanced(by: offset)
76+
77+
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
78+
79+
currentToken = self.advance()
80+
}
81+
5382
/// Reset the lexeme sequence to the state we were in when lexing `splitToken`
5483
/// but after we consumed `consumedPrefix` bytes from `splitToken`.
5584
/// - Warning: Do not add more usages of this function.

Sources/SwiftParser/Lookahead.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extension Parser {
3333
) {
3434
self.lexemes = lexemes
3535
self.currentToken = currentToken
36+
self.lexemes.recordFurthestOffset()
3637
}
3738

3839
fileprivate init(cloning other: Parser) {
@@ -90,6 +91,7 @@ extension Parser.Lookahead {
9091
mutating func consumeAnyToken() {
9192
tokensConsumed += 1
9293
self.currentToken = self.lexemes.advance()
94+
self.lexemes.recordFurthestOffset()
9395
}
9496

9597
mutating func consumeAnyToken(remapping: RawTokenKind) {

Sources/SwiftParser/Parser.swift

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ public struct Parser {
101101
/// When this nesting level is exceeded, the parser should stop parsing.
102102
let maximumNestingLevel: Int
103103

104+
let parseLookup: IncrementalParseLookup?
105+
106+
let parseNodeAffectRange: IncrementalParseNodeAffectRangeCollector?
107+
104108
/// A default maximum nesting level that is used if the client didn't
105109
/// explicitly specify one. Debug builds of the parser comume a lot more stack
106110
/// space and thus have a lower default maximum nesting level.
@@ -111,7 +115,12 @@ public struct Parser {
111115
#endif
112116

113117
/// Initializes a ``Parser`` from the given string.
114-
public init(_ input: String, maximumNestingLevel: Int? = nil) {
118+
public init(
119+
_ input: String,
120+
maximumNestingLevel: Int? = nil,
121+
parseNodeAffectRange: IncrementalParseNodeAffectRangeCollector? = nil,
122+
parseTransition: IncrementalParseTransition? = nil
123+
) {
115124
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
116125

117126
self.arena = ParsingSyntaxArena(
@@ -126,6 +135,14 @@ public struct Parser {
126135

127136
self.lexemes = Lexer.tokenize(interned)
128137
self.currentToken = self.lexemes.advance()
138+
self.parseNodeAffectRange = parseNodeAffectRange
139+
if let parseTransition,
140+
let parseNodeAffectRange
141+
{
142+
self.parseLookup = IncrementalParseLookup(transition: parseTransition, nodeAffectRangeCollector: parseNodeAffectRange)
143+
} else {
144+
self.parseLookup = nil
145+
}
129146
}
130147

131148
/// Initializes a ``Parser`` from the given input buffer.
@@ -142,7 +159,13 @@ public struct Parser {
142159
/// arena is created automatically, and `input` copied into the
143160
/// arena. If non-`nil`, `input` must be within its registered
144161
/// source buffer or allocator.
145-
public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: ParsingSyntaxArena? = nil) {
162+
public init(
163+
_ input: UnsafeBufferPointer<UInt8>,
164+
maximumNestingLevel: Int? = nil,
165+
parseNodeAffectRange: IncrementalParseNodeAffectRangeCollector? = nil,
166+
parseTransition: IncrementalParseTransition? = nil,
167+
arena: ParsingSyntaxArena? = nil
168+
) {
146169
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
147170

148171
var sourceBuffer: UnsafeBufferPointer<UInt8>
@@ -159,6 +182,14 @@ public struct Parser {
159182

160183
self.lexemes = Lexer.tokenize(sourceBuffer)
161184
self.currentToken = self.lexemes.advance()
185+
self.parseNodeAffectRange = parseNodeAffectRange
186+
if let parseTransition,
187+
let parseNodeAffectRange
188+
{
189+
self.parseLookup = IncrementalParseLookup(transition: parseTransition, nodeAffectRangeCollector: parseNodeAffectRange)
190+
} else {
191+
self.parseLookup = nil
192+
}
162193
}
163194

164195
mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
@@ -237,6 +268,7 @@ public struct Parser {
237268
extension Parser {
238269
/// Retrieves the token following the current token without consuming it.
239270
func peek() -> Lexer.Lexeme {
271+
lexemes.recordFurthestOffset()
240272
return self.lexemes.peek()
241273
}
242274
}
@@ -629,3 +661,44 @@ extension Parser {
629661
)
630662
}
631663
}
664+
665+
// MARK: Incremental Parsing
666+
extension Parser {
667+
mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
668+
guard var parseLookup else {
669+
return nil
670+
}
671+
672+
let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
673+
if let node = parseLookup.lookUp(currentOffset, kind: kind) {
674+
self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
675+
return node
676+
}
677+
678+
return nil
679+
}
680+
681+
func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
682+
guard let parseNodeAffectRange else {
683+
return
684+
}
685+
parseNodeAffectRange.registerNodeForIncrementalParse(
686+
node: node,
687+
length: max(lookaheadFurthestOffset - self.lexemes.getOffsetToStart(startToken), node.byteLength + currentToken.byteLength)
688+
)
689+
}
690+
691+
public var lookaheadFurthestOffset: Int {
692+
return lexemes.lookaheadTracker.pointee.furthestOffset
693+
}
694+
}
695+
696+
/// Record the furthest offset to `sourceBufferStart` that is reached by ``Parser.Peek()`` or ``Lookahead`` in ``Lexer/LexemeSequence``
697+
struct LookaheadTracker {
698+
private(set) var furthestOffset: Int = 0
699+
700+
public mutating func recordFurthestOffset(_ furthestOffset: Int) {
701+
/// We could lookahead multi-times to find different valid part of a node, so we should take the maximum of the lookahead offset as the possible affect range of a node.
702+
self.furthestOffset = max(furthestOffset, self.furthestOffset)
703+
}
704+
}

Sources/SwiftParser/TopLevel.swift

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ extension Parser {
151151
/// statement → compiler-control-statement
152152
/// statements → statement statements?
153153
mutating func parseCodeBlockItem(isAtTopLevel: Bool, allowInitDecl: Bool) -> RawCodeBlockItemSyntax? {
154+
let startToken = self.currentToken
155+
if let syntax = self.loadCurrentSyntaxNodeFromCache(for: .codeBlockItem) {
156+
self.registerNodeForIncrementalParse(node: syntax.raw, startToken: startToken)
157+
return RawCodeBlockItemSyntax(syntax.raw)
158+
}
159+
154160
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
155161
return RawCodeBlockItemSyntax(
156162
remainingTokens,
@@ -183,12 +189,17 @@ extension Parser {
183189
if item.raw.isEmpty && semi == nil && trailingSemis.isEmpty {
184190
return nil
185191
}
186-
return RawCodeBlockItemSyntax(
192+
193+
let result = RawCodeBlockItemSyntax(
187194
item: item,
188195
semicolon: semi,
189196
RawUnexpectedNodesSyntax(trailingSemis, arena: self.arena),
190197
arena: self.arena
191198
)
199+
200+
self.registerNodeForIncrementalParse(node: result.raw, startToken: startToken)
201+
202+
return result
192203
}
193204

194205
private mutating func parseStatementItem() -> RawCodeBlockItemSyntax.Item {

Sources/SwiftParser/generated/Parser+Entry.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ extension Parser {
1919
/// `Parser.init` for more details.
2020
public static func parse(
2121
source: String,
22+
parseNodeAffectRange: IncrementalParseNodeAffectRangeCollector? = nil,
2223
parseTransition: IncrementalParseTransition? = nil
2324
) -> SourceFileSyntax {
24-
var parser = Parser(source)
25+
var parser = Parser(source, parseNodeAffectRange: parseNodeAffectRange, parseTransition: parseTransition)
2526
return SourceFileSyntax.parse(from: &parser)
2627
}
2728

@@ -30,9 +31,10 @@ extension Parser {
3031
public static func parse(
3132
source: UnsafeBufferPointer<UInt8>,
3233
maximumNestingLevel: Int? = nil,
34+
parseNodeAffectRange: IncrementalParseNodeAffectRangeCollector? = nil,
3335
parseTransition: IncrementalParseTransition? = nil
3436
) -> SourceFileSyntax {
35-
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
37+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseNodeAffectRange: parseNodeAffectRange, parseTransition: parseTransition)
3638
return SourceFileSyntax.parse(from: &parser)
3739
}
3840
}

Sources/SwiftSyntax/Raw/RawSyntax.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,20 @@ extension RawSyntax {
918918
}
919919
}
920920

921+
extension RawSyntax: Identifiable {
922+
public struct ID: Hashable {
923+
/// The pointer to the start of the `RawSyntax` node.
924+
private var pointer: UnsafeRawPointer
925+
fileprivate init(_ raw: RawSyntax) {
926+
self.pointer = UnsafeRawPointer(raw.pointer)
927+
}
928+
}
929+
930+
public var id: ID {
931+
return ID(self)
932+
}
933+
}
934+
921935
#if DEBUG
922936
/// See `SyntaxMemoryLayout`.
923937
var RawSyntaxDataMemoryLayouts: [String: SyntaxMemoryLayout.Value] = [

0 commit comments

Comments
 (0)