Skip to content

Port incremental parsing ability to CodeBlockItem #1685

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// Parse the source code in the given string as Swift source file. See
/// `Parser.init` for more details.
public static func parse(
source: String,
parseTransition: IncrementalParseTransition? = nil
source: String
) -> SourceFileSyntax {
var parser = Parser(source)
return SourceFileSyntax.parse(from: &parser)
Expand All @@ -35,18 +34,66 @@ let parserEntryFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {

DeclSyntax(
"""
/// Parse the source code in the given string as Swift source file. See
/// Parse the source code in the given buffer as Swift source file. See
/// `Parser.init` for more details.
public static func parse(
source: UnsafeBufferPointer<UInt8>,
maximumNestingLevel: Int? = nil,
parseTransition: IncrementalParseTransition? = nil
maximumNestingLevel: Int? = nil
) -> SourceFileSyntax {
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
return SourceFileSyntax.parse(from: &parser)
}
"""
)

DeclSyntax(
"""
/// Parse the source code in the given string as Swift source file with support
/// for incremental parsing.
///
/// When parsing a source file for the first time, invoke `parseIncrementally`
/// with `parseTransition: nil`. This returns the initial tree as well as
/// ``LookaheadRanges``. If an edit is made to the source file, an
/// ``IncrementalParseTransition`` can be constructed from the initial tree
/// and its ``LookaheadRanges``. When invoking `parseIncrementally` again with
/// the post-edit source and that parse transition, the parser will re-use
/// nodes that haven’t changed.
///
/// - Parameters:
/// - source: The source code to parse
/// - parseTransition: If a similar source file has already been parsed, the
/// ``IncrementalParseTransition`` that contains the previous tree as well
/// as the edits that were performed to it.
/// - Returns: The parsed tree as well as the ``LookaheadRanges`` that describe
/// how far the parser looked ahead while parsing a node, which is
/// necessary to construct an ``IncrementalParseTransition`` for a
/// subsequent incremental parse
public static func parseIncrementally(
source: String,
parseTransition: IncrementalParseTransition?
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
var parser = Parser(source, parseTransition: parseTransition)
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
}
"""
)

DeclSyntax(
"""
/// Parse the source code in the given buffer as Swift source file with support
/// for incremental parsing.
///
/// See doc comments in ``Parser/parseIncrementally(source:parseTransition:)``
public static func parseIncrementally(
source: UnsafeBufferPointer<UInt8>,
maximumNestingLevel: Int? = nil,
parseTransition: IncrementalParseTransition?
) -> (tree: SourceFileSyntax, lookaheadRanges: LookaheadRanges) {
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel, parseTransition: parseTransition)
return (SourceFileSyntax.parse(from: &parser), parser.lookaheadRanges)
}
"""
)
}

DeclSyntax(
Expand Down
65 changes: 39 additions & 26 deletions Sources/SwiftParser/IncrementalParseTransition.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,30 @@
//===----------------------------------------------------------------------===//

@_spi(RawSyntax) import SwiftSyntax

extension Parser {
mutating func loadCurrentSyntaxNodeFromCache(for kind: SyntaxKind) -> Syntax? {
guard parseLookup != nil else {
return nil
}

let currentOffset = self.lexemes.getOffsetToStart(self.currentToken)
if let node = parseLookup!.lookUp(currentOffset, kind: kind) {
self.lexemes.advance(by: node.byteSize, currentToken: &self.currentToken)
return node
}

return nil
}

mutating func registerNodeForIncrementalParse(node: RawSyntax, startToken: Lexer.Lexeme) {
lookaheadRanges.registerNodeForIncrementalParse(
node: node,
lookaheadLength: lexemes.lookaheadTracker.pointee.furthestOffset - self.lexemes.getOffsetToStart(startToken)
)
}
}

/// Accepts the re-used ``Syntax`` nodes that `IncrementalParseTransition`
/// determined they should be re-used for a parse invocation.
///
Expand All @@ -20,26 +44,25 @@
/// This is also used for testing purposes to ensure incremental reparsing
/// worked as expected.
public protocol IncrementalParseReusedNodeDelegate {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely not part of this PR but I think we should consider change IncrementalParseReusedNodeDelegate to just be a callback function on IncrementalParseTransition, i.e. removing this protocol altogether. That’s more swifty than the Objective-C delegate pattern.

/// Accepts the range and ``Syntax`` node of skipped source region.
/// Accepts ``Syntax`` node of skipped source region.
///
/// - Parameters:
/// - range: The source region of the currently parsed source.
/// - previousNode: The node from the previous tree that is associated with
/// the skipped source region.
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
func parserReusedNode(previousNode: Syntax)
}

/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
/// the range and re-used node into an array.
public final class IncrementalParseReusedNodeCollector:
IncrementalParseReusedNodeDelegate
{
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
public var nodes: [Syntax] = []

public init() {}

public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
rangeAndNodes.append((range, previousNode))
public func parserReusedNode(previousNode: Syntax) {
nodes.append(previousNode)
}
}

Expand All @@ -48,6 +71,7 @@ public final class IncrementalParseReusedNodeCollector:
public final class IncrementalParseTransition {
fileprivate let previousTree: SourceFileSyntax
fileprivate let edits: ConcurrentEdits
fileprivate let lookaheadRanges: LookaheadRanges
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?

/// - Parameters:
Expand All @@ -59,17 +83,19 @@ public final class IncrementalParseTransition {
public init(
previousTree: SourceFileSyntax,
edits: ConcurrentEdits,
lookaheadRanges: LookaheadRanges,
reusedNodeDelegate: IncrementalParseReusedNodeDelegate? = nil
) {
self.previousTree = previousTree
self.edits = edits
self.lookaheadRanges = lookaheadRanges
self.reusedDelegate = reusedNodeDelegate
}
}

/// Provides a mechanism for the parser to skip regions of an incrementally
/// updated source that was already parsed during a previous parse invocation.
public struct IncrementalParseLookup {
struct IncrementalParseLookup {
fileprivate let transition: IncrementalParseTransition
fileprivate var cursor: SyntaxCursor

Expand Down Expand Up @@ -100,23 +126,21 @@ public struct IncrementalParseLookup {
/// - Returns: A ``Syntax`` node from the previous parse invocation,
/// representing the contents of this region, if it is still valid
/// to re-use. `nil` otherwise.
@_spi(RawSyntax)
public mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
fileprivate mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> Syntax? {
guard let prevOffset = translateToPreEditOffset(newOffset) else {
return nil
}
let prevPosition = AbsolutePosition(utf8Offset: prevOffset)
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
if let delegate = reusedDelegate, let node {
delegate.parserReusedNode(
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
previousNode: node
)
}
return node
}

mutating fileprivate func cursorLookup(
fileprivate mutating func cursorLookup(
prevPosition: AbsolutePosition,
kind: SyntaxKind
) -> Syntax? {
Expand Down Expand Up @@ -148,24 +172,13 @@ public struct IncrementalParseLookup {
return true
}

// Node can also not be reused if an edit has been made in the next token's
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
// a StructDecl inside and `private struc Foo {}` parses as two
// CodeBlockItems one for `private` and one for `struc Foo {}`
var nextLeafNodeLength: SourceLength = .zero
if let nextSibling = cursor.nextSibling {
// Fast path check: if next sibling is before all the edits then we can
// re-use the node.
if !edits.edits.isEmpty && edits.edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
return true
}
if let nextToken = nextSibling.firstToken(viewMode: .sourceAccurate) {
nextLeafNodeLength = nextToken.leadingTriviaLength + nextToken.contentLength
}
guard let nodeAffectRangeLength = transition.lookaheadRanges.lookaheadRanges[node.raw.id] else {
return false
}

let nodeAffectRange = ByteSourceRange(
offset: node.position.utf8Offset,
length: (node.totalLength + nextLeafNodeLength).utf8Length
length: nodeAffectRangeLength
)

for edit in edits.edits {
Expand Down
44 changes: 41 additions & 3 deletions Sources/SwiftParser/Lexer/LexemeSequence.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,60 @@ extension Lexer {
/// usually less than 0.1% of the memory allocated by the syntax arena.
var lexerStateAllocator = BumpPtrAllocator(slabSize: 256)

fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor) {
/// The offset of the trailing trivia end of `nextToken` relative to the source buffer’s start.
var offsetToNextTokenEnd: Int {
self.getOffsetToStart(self.nextToken) + self.nextToken.byteLength
}

/// See doc comments in ``LookaheadTracker``
///
/// This is an `UnsafeMutablePointer` for two reasons
/// - When `LexemeSequence` gets copied (e.g. when a ``Lookahead`` gets created), it should still reference the same ``LookaheadTracker`` so that any lookahead performed in the ``Lookahead`` also affects the original ``Parser``. It thus needs to be a reference type
/// - ``LookaheadTracker`` is not a class to avoid reference counting it. The ``Parser`` that creates the ``LexemeSequence`` will always outlive any ``Lookahead`` created for it.
let lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>

fileprivate init(sourceBufferStart: Lexer.Cursor, cursor: Lexer.Cursor, lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>) {
self.sourceBufferStart = sourceBufferStart
self.cursor = cursor
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
self.lookaheadTracker = lookaheadTracker
}

@_spi(Testing)
public mutating func next() -> Lexer.Lexeme? {
return self.advance()
}

/// Record the offset of the end of `nextToken` as the furthest offset in ``LookaheadTracker``
private func recordNextTokenInLookaheadTracker() {
self.lookaheadTracker.pointee.recordFurthestOffset(self.offsetToNextTokenEnd)
}

mutating func advance() -> Lexer.Lexeme {
defer {
self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
}
self.recordNextTokenInLookaheadTracker()
return self.nextToken
}

/// Get the offset of the leading trivia start of `token` relative to `sourceBufferStart`.
func getOffsetToStart(_ token: Lexer.Lexeme) -> Int {
return self.sourceBufferStart.distance(to: token.cursor)
}
Comment on lines +73 to +75
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not part of this PR: Can we use getOffsetToStart instead of offset(of:).


/// Advance the the cursor by `offset` and reset `currentToken`
///
/// - Important: This should only be used for incremental parsing.
mutating func advance(by offset: Int, currentToken: inout Lexer.Lexeme) {
self.cursor = currentToken.cursor
self.cursor.position = self.cursor.position.advanced(by: offset)

self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)

currentToken = self.advance()
}

/// Reset the lexeme sequence to the state we were in when lexing `splitToken`
/// but after we consumed `consumedPrefix` bytes from `splitToken`.
/// - Warning: Do not add more usages of this function.
Expand All @@ -63,6 +99,7 @@ extension Lexer {
}

func peek() -> Lexer.Lexeme {
self.recordNextTokenInLookaheadTracker()
return self.nextToken
}

Expand Down Expand Up @@ -104,12 +141,13 @@ extension Lexer {
@_spi(Testing)
public static func tokenize(
_ input: UnsafeBufferPointer<UInt8>,
from startIndex: Int = 0
from startIndex: Int = 0,
lookaheadTracker: UnsafeMutablePointer<LookaheadTracker>
) -> LexemeSequence {
precondition(input.isEmpty || startIndex < input.endIndex)
let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex - 1]
let start = Cursor(input: input, previous: UInt8(ascii: "\0"))
let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar)
return LexemeSequence(sourceBufferStart: start, cursor: cursor)
return LexemeSequence(sourceBufferStart: start, cursor: cursor, lookaheadTracker: lookaheadTracker)
}
}
Loading